This commit is contained in:
martin.fencl
2025-12-23 23:07:31 +01:00
parent eb9c56bb5e
commit e710669c84

View File

@@ -27,9 +27,12 @@
raid_retries: "{{ RETRIES }}"
raid_delay: 2
# SSH options (keeps same style as your working playbooks, but avoids auth prompts)
# Hard timeout for the whole SSH command (prevents hanging forever)
ssh_hard_timeout: 30
# SSH options (same style, but avoids auth prompts)
ssh_opts:
- "-o"
- "-o" # English comments
- "StrictHostKeyChecking=no"
- "-o"
- "UserKnownHostsFile=/dev/null"
@@ -60,10 +63,6 @@
print(f"ERROR: cannot read /proc/mdstat: {e}")
sys.exit(2)
# Find token like [UU] / [U_] for the selected md device
# Example lines:
# md0 : active raid1 sdb1[0] sdc1[1]
# 11718751232 blocks super 1.2 [2/2] [UU]
pat = re.compile(
rf"^{re.escape(md)}\s*:\s*active.*\n\s*\d+\s+blocks.*\[\d+/\d+\]\s*\[([U_]+)\]",
re.MULTILINE
@@ -76,8 +75,8 @@
token = m.group(1)
syncing = bool(re.search(r"\b(resync|recovery|reshape|check|repair)\b", txt))
degraded = "_" in token
print(f"RAID={md} token=[{token}] degraded={degraded} syncing={syncing} allow_sync={allow_sync}")
if degraded:
@@ -98,30 +97,16 @@
- name: Run RAID check commands on VM (via SSH) # use SSHPASS env, hide item label
ansible.builtin.command:
argv:
- sshpass
- -e
- ssh
- "{{ ssh_opts[0] }}"
- "{{ ssh_opts[1] }}"
- "{{ ssh_opts[2] }}"
- "{{ ssh_opts[3] }}"
- "{{ ssh_opts[4] }}"
- "{{ ssh_opts[5] }}"
- "{{ ssh_opts[6] }}"
- "{{ ssh_opts[7] }}"
- "{{ ssh_opts[8] }}"
- "{{ ssh_opts[9] }}"
- "{{ ssh_opts[10] }}"
- "{{ ssh_opts[11] }}"
- "{{ ssh_opts[12] }}"
- "{{ ssh_opts[13] }}"
- "{{ ssh_opts[14] }}"
- "{{ ssh_opts[15] }}"
- "{{ vm_user }}@{{ vm_ip }}"
- bash
- -lc
- "{{ ('sudo ' if use_sudo else '') + item }}"
argv: >-
{{
['timeout', '-k', '5', (ssh_hard_timeout | string)]
+ ['sshpass', '-e', 'ssh']
+ ssh_opts
+ [ vm_user ~ '@' ~ vm_ip,
'bash', '-lc',
('sudo ' if use_sudo else '') + item
]
}}
environment:
SSHPASS: "{{ vm_pass }}"
loop: "{{ raid_commands }}"
@@ -134,7 +119,8 @@
no_log: "{{ DEBUG == 0 }}"
retries: "{{ raid_retries }}"
delay: "{{ raid_delay }}"
until: raid_cmds is succeeded # command executed (rc can be non-zero; we handle later)
# Retry only on typical SSH/timeout failures (255=ssh error, 124=timeout)
until: (raid_cmds.results | selectattr('rc', 'in', [124, 255]) | list | length) == 0
- name: Show outputs for each RAID command
ansible.builtin.debug: