diff --git a/check_raid.yml b/check_raid.yml index 717e8df..7ef9e35 100644 --- a/check_raid.yml +++ b/check_raid.yml @@ -8,38 +8,32 @@ become_method: sudo vars: - # VM connection (provided by Semaphore env vars) vm_ip: "{{ lookup('env', 'VM_IP') }}" vm_user: "{{ lookup('env', 'VM_USER') }}" vm_pass: "{{ lookup('env', 'VM_PASS') }}" use_sudo: false - # --- Debug mode (controlled via Semaphore variable) --- DEBUG: "{{ lookup('env', 'DEBUG') | default(0) | int }}" RETRIES: "{{ lookup('env', 'RETRIES') | default(25) | int }}" - # --- RAID specifics --- - # RAID_MD can be: md0 / md1 / ... OR "auto" to check all arrays found in /proc/mdstat raid_md_device: "{{ lookup('env', 'RAID_MD') | default('md0', true) }}" - # 1 = allow resync/recovery/reshape/check/repair; 0 = fail when such activity is detected raid_allow_sync: "{{ lookup('env', 'RAID_ALLOW_SYNC') | default(1, true) | int }}" - # 1 = do not fail when no MD arrays exist on the target raid_allow_no_array: "{{ lookup('env', 'RAID_ALLOW_NO_ARRAY') | default(0, true) | int }}" - # Retry policy raid_retries: "{{ RETRIES }}" raid_delay: 2 - - # Hard timeout for the whole SSH command (prevents hanging forever) ssh_hard_timeout: 30 - # SSH options (same style, but avoids auth prompts) ssh_opts: - "-o" # English comments - "StrictHostKeyChecking=no" - "-o" - "UserKnownHostsFile=/dev/null" - "-o" + - "GlobalKnownHostsFile=/dev/null" + - "-o" + - "LogLevel=ERROR" + - "-o" - "ConnectTimeout=15" - "-o" - "PreferredAuthentications=password" @@ -50,7 +44,6 @@ - "-o" - "NumberOfPasswordPrompts=1" - # Commands to run on the target VM raid_commands: - | python3 - <<'PY' @@ -67,30 +60,24 @@ print(f"ERROR: cannot read /proc/mdstat: {e}") sys.exit(2) - # Find all md arrays present - # We parse tokens like: [2/2] [UU] arrays = {} header_re = re.compile(r"^(md\d+)\s*:\s*active.*$", re.MULTILINE) token_re = re.compile(r"^\s*\d+\s+blocks.*\[\d+/\d+\]\s*\[([U_]+)\]\s*$", re.MULTILINE) for m in header_re.finditer(txt): name = m.group(1) - start = m.end() - # Look ahead for the next token line after this header - chunk = txt[start:start + 3000] + chunk = txt[m.end():m.end() + 3000] tm = token_re.search(chunk) if tm: arrays[name] = tm.group(1) if not arrays: - msg = "NO_MD_ARRAYS: /proc/mdstat contains no active md arrays." - print(msg) + print("NO_MD_ARRAYS: /proc/mdstat contains no active md arrays.") print(txt.strip()) sys.exit(0 if allow_no_array else 2) syncing = bool(re.search(r"\b(resync|recovery|reshape|check|repair)\b", txt)) - # Decide which arrays to check if target == "auto": to_check = sorted(arrays.keys()) else: @@ -100,18 +87,14 @@ sys.exit(2) to_check = [target] - bad = [] + any_degraded = False for name in to_check: token = arrays[name] degraded = "_" in token - bad.append((name, token, degraded)) - - # Print summary - for name, token, degraded in bad: + any_degraded = any_degraded or degraded print(f"RAID={name} token=[{token}] degraded={degraded} syncing={syncing} allow_sync={allow_sync}") - # Fail conditions - if any(degraded for _, _, degraded in bad): + if any_degraded: sys.exit(1) if syncing and not allow_sync: @@ -152,11 +135,11 @@ retries: "{{ raid_retries }}" delay: "{{ raid_delay }}" until: raid_cmds.rc not in [124, 255] + run_once: true - name: Show outputs for each RAID command ansible.builtin.debug: msg: | - CMD: {{ item.item | default('n/a') }} RC: {{ item.rc }} STDOUT: {{ (item.stdout | default('')).strip() }} @@ -164,6 +147,7 @@ {{ (item.stderr | default('')).strip() }} loop: "{{ (raid_cmds.results if (raid_cmds.results is defined) else [raid_cmds]) }}" when: DEBUG == 1 + run_once: true - name: Fail play if RAID check failed # English comments ansible.builtin.assert: @@ -174,3 +158,4 @@ loop_control: index_var: idx label: "cmd-{{ idx }}" + run_once: true