From e710669c8432bc17d6682396c104603c09f6429f Mon Sep 17 00:00:00 2001 From: "martin.fencl" Date: Tue, 23 Dec 2025 23:07:31 +0100 Subject: [PATCH] redo --- check_raid.yml | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/check_raid.yml b/check_raid.yml index aff7a9b..30754aa 100644 --- a/check_raid.yml +++ b/check_raid.yml @@ -27,9 +27,12 @@ raid_retries: "{{ RETRIES }}" raid_delay: 2 - # SSH options (keeps same style as your working playbooks, but avoids auth prompts) + # Hard timeout for the whole SSH command (prevents hanging forever) + ssh_hard_timeout: 30 + + # SSH options (same style, but avoids auth prompts) ssh_opts: - - "-o" + - "-o" # English comments - "StrictHostKeyChecking=no" - "-o" - "UserKnownHostsFile=/dev/null" @@ -60,10 +63,6 @@ print(f"ERROR: cannot read /proc/mdstat: {e}") sys.exit(2) - # Find token like [UU] / [U_] for the selected md device - # Example lines: - # md0 : active raid1 sdb1[0] sdc1[1] - # 11718751232 blocks super 1.2 [2/2] [UU] pat = re.compile( rf"^{re.escape(md)}\s*:\s*active.*\n\s*\d+\s+blocks.*\[\d+/\d+\]\s*\[([U_]+)\]", re.MULTILINE @@ -76,8 +75,8 @@ token = m.group(1) syncing = bool(re.search(r"\b(resync|recovery|reshape|check|repair)\b", txt)) - degraded = "_" in token + print(f"RAID={md} token=[{token}] degraded={degraded} syncing={syncing} allow_sync={allow_sync}") if degraded: @@ -98,30 +97,16 @@ - name: Run RAID check commands on VM (via SSH) # use SSHPASS env, hide item label ansible.builtin.command: - argv: - - sshpass - - -e - - ssh - - "{{ ssh_opts[0] }}" - - "{{ ssh_opts[1] }}" - - "{{ ssh_opts[2] }}" - - "{{ ssh_opts[3] }}" - - "{{ ssh_opts[4] }}" - - "{{ ssh_opts[5] }}" - - "{{ ssh_opts[6] }}" - - "{{ ssh_opts[7] }}" - - "{{ ssh_opts[8] }}" - - "{{ ssh_opts[9] }}" - - "{{ ssh_opts[10] }}" - - "{{ ssh_opts[11] }}" - - "{{ ssh_opts[12] }}" - - "{{ ssh_opts[13] }}" - - "{{ ssh_opts[14] }}" - - "{{ ssh_opts[15] }}" - - "{{ vm_user }}@{{ vm_ip }}" - - bash - - -lc - - "{{ ('sudo ' if use_sudo else '') + item }}" + argv: >- + {{ + ['timeout', '-k', '5', (ssh_hard_timeout | string)] + + ['sshpass', '-e', 'ssh'] + + ssh_opts + + [ vm_user ~ '@' ~ vm_ip, + 'bash', '-lc', + ('sudo ' if use_sudo else '') + item + ] + }} environment: SSHPASS: "{{ vm_pass }}" loop: "{{ raid_commands }}" @@ -134,7 +119,8 @@ no_log: "{{ DEBUG == 0 }}" retries: "{{ raid_retries }}" delay: "{{ raid_delay }}" - until: raid_cmds is succeeded # command executed (rc can be non-zero; we handle later) + # Retry only on typical SSH/timeout failures (255=ssh error, 124=timeout) + until: (raid_cmds.results | selectattr('rc', 'in', [124, 255]) | list | length) == 0 - name: Show outputs for each RAID command ansible.builtin.debug: