forked from jakub/ansible
redo
This commit is contained in:
@@ -19,9 +19,12 @@
|
|||||||
RETRIES: "{{ lookup('env', 'RETRIES') | default(25) | int }}"
|
RETRIES: "{{ lookup('env', 'RETRIES') | default(25) | int }}"
|
||||||
|
|
||||||
# --- RAID specifics ---
|
# --- RAID specifics ---
|
||||||
|
# RAID_MD can be: md0 / md1 / ... OR "auto" to check all arrays found in /proc/mdstat
|
||||||
raid_md_device: "{{ lookup('env', 'RAID_MD') | default('md0', true) }}"
|
raid_md_device: "{{ lookup('env', 'RAID_MD') | default('md0', true) }}"
|
||||||
# 1 = allow resync/recovery/reshape/check/repair; 0 = fail when such activity is detected
|
# 1 = allow resync/recovery/reshape/check/repair; 0 = fail when such activity is detected
|
||||||
raid_allow_sync: "{{ lookup('env', 'RAID_ALLOW_SYNC') | default(1, true) | int }}"
|
raid_allow_sync: "{{ lookup('env', 'RAID_ALLOW_SYNC') | default(1, true) | int }}"
|
||||||
|
# 1 = do not fail when no MD arrays exist on the target
|
||||||
|
raid_allow_no_array: "{{ lookup('env', 'RAID_ALLOW_NO_ARRAY') | default(0, true) | int }}"
|
||||||
|
|
||||||
# Retry policy
|
# Retry policy
|
||||||
raid_retries: "{{ RETRIES }}"
|
raid_retries: "{{ RETRIES }}"
|
||||||
@@ -54,8 +57,9 @@
|
|||||||
# Parse /proc/mdstat and validate MD RAID state
|
# Parse /proc/mdstat and validate MD RAID state
|
||||||
import re, sys
|
import re, sys
|
||||||
|
|
||||||
md = "{{ raid_md_device }}"
|
target = "{{ raid_md_device }}"
|
||||||
allow_sync = int("{{ raid_allow_sync }}")
|
allow_sync = int("{{ raid_allow_sync }}")
|
||||||
|
allow_no_array = int("{{ raid_allow_no_array }}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
txt = open("/proc/mdstat", "r", encoding="utf-8", errors="ignore").read()
|
txt = open("/proc/mdstat", "r", encoding="utf-8", errors="ignore").read()
|
||||||
@@ -63,23 +67,51 @@
|
|||||||
print(f"ERROR: cannot read /proc/mdstat: {e}")
|
print(f"ERROR: cannot read /proc/mdstat: {e}")
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
pat = re.compile(
|
# Find all md arrays present
|
||||||
rf"^{re.escape(md)}\s*:\s*active.*\n\s*\d+\s+blocks.*\[\d+/\d+\]\s*\[([U_]+)\]",
|
# We parse tokens like: [2/2] [UU]
|
||||||
re.MULTILINE
|
arrays = {}
|
||||||
)
|
header_re = re.compile(r"^(md\d+)\s*:\s*active.*$", re.MULTILINE)
|
||||||
m = pat.search(txt)
|
token_re = re.compile(r"^\s*\d+\s+blocks.*\[\d+/\d+\]\s*\[([U_]+)\]\s*$", re.MULTILINE)
|
||||||
if not m:
|
|
||||||
print(f"ERROR: {md} not found in /proc/mdstat")
|
for m in header_re.finditer(txt):
|
||||||
|
name = m.group(1)
|
||||||
|
start = m.end()
|
||||||
|
# Look ahead for the next token line after this header
|
||||||
|
chunk = txt[start:start + 3000]
|
||||||
|
tm = token_re.search(chunk)
|
||||||
|
if tm:
|
||||||
|
arrays[name] = tm.group(1)
|
||||||
|
|
||||||
|
if not arrays:
|
||||||
|
msg = "NO_MD_ARRAYS: /proc/mdstat contains no active md arrays."
|
||||||
|
print(msg)
|
||||||
|
print(txt.strip())
|
||||||
|
sys.exit(0 if allow_no_array else 2)
|
||||||
|
|
||||||
|
syncing = bool(re.search(r"\b(resync|recovery|reshape|check|repair)\b", txt))
|
||||||
|
|
||||||
|
# Decide which arrays to check
|
||||||
|
if target == "auto":
|
||||||
|
to_check = sorted(arrays.keys())
|
||||||
|
else:
|
||||||
|
if target not in arrays:
|
||||||
|
print(f"ERROR: {target} not found in /proc/mdstat. Found={sorted(arrays.keys())}")
|
||||||
print(txt.strip())
|
print(txt.strip())
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
to_check = [target]
|
||||||
|
|
||||||
token = m.group(1)
|
bad = []
|
||||||
syncing = bool(re.search(r"\b(resync|recovery|reshape|check|repair)\b", txt))
|
for name in to_check:
|
||||||
|
token = arrays[name]
|
||||||
degraded = "_" in token
|
degraded = "_" in token
|
||||||
|
bad.append((name, token, degraded))
|
||||||
|
|
||||||
print(f"RAID={md} token=[{token}] degraded={degraded} syncing={syncing} allow_sync={allow_sync}")
|
# Print summary
|
||||||
|
for name, token, degraded in bad:
|
||||||
|
print(f"RAID={name} token=[{token}] degraded={degraded} syncing={syncing} allow_sync={allow_sync}")
|
||||||
|
|
||||||
if degraded:
|
# Fail conditions
|
||||||
|
if any(degraded for _, _, degraded in bad):
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if syncing and not allow_sync:
|
if syncing and not allow_sync:
|
||||||
@@ -115,11 +147,10 @@
|
|||||||
label: "cmd-{{ idx }}"
|
label: "cmd-{{ idx }}"
|
||||||
register: raid_cmds
|
register: raid_cmds
|
||||||
changed_when: false
|
changed_when: false
|
||||||
failed_when: false # do not stop early; assert below decides
|
failed_when: false
|
||||||
no_log: "{{ DEBUG == 0 }}"
|
no_log: "{{ DEBUG == 0 }}"
|
||||||
retries: "{{ raid_retries }}"
|
retries: "{{ raid_retries }}"
|
||||||
delay: "{{ raid_delay }}"
|
delay: "{{ raid_delay }}"
|
||||||
# Retry only on typical SSH/timeout failures (255=ssh error, 124=timeout)
|
|
||||||
until: raid_cmds.rc not in [124, 255]
|
until: raid_cmds.rc not in [124, 255]
|
||||||
|
|
||||||
- name: Show outputs for each RAID command
|
- name: Show outputs for each RAID command
|
||||||
|
|||||||
Reference in New Issue
Block a user