This commit is contained in:
martin.fencl
2025-12-23 23:34:57 +01:00
parent 74c3ef8945
commit 4038f5b6a1

View File

@@ -8,38 +8,32 @@
become_method: sudo become_method: sudo
vars: vars:
# VM connection (provided by Semaphore env vars)
vm_ip: "{{ lookup('env', 'VM_IP') }}" vm_ip: "{{ lookup('env', 'VM_IP') }}"
vm_user: "{{ lookup('env', 'VM_USER') }}" vm_user: "{{ lookup('env', 'VM_USER') }}"
vm_pass: "{{ lookup('env', 'VM_PASS') }}" vm_pass: "{{ lookup('env', 'VM_PASS') }}"
use_sudo: false use_sudo: false
# --- Debug mode (controlled via Semaphore variable) ---
DEBUG: "{{ lookup('env', 'DEBUG') | default(0) | int }}" DEBUG: "{{ lookup('env', 'DEBUG') | default(0) | int }}"
RETRIES: "{{ lookup('env', 'RETRIES') | default(25) | int }}" RETRIES: "{{ lookup('env', 'RETRIES') | default(25) | int }}"
# --- RAID specifics ---
# RAID_MD can be: md0 / md1 / ... OR "auto" to check all arrays found in /proc/mdstat
raid_md_device: "{{ lookup('env', 'RAID_MD') | default('md0', true) }}" raid_md_device: "{{ lookup('env', 'RAID_MD') | default('md0', true) }}"
# 1 = allow resync/recovery/reshape/check/repair; 0 = fail when such activity is detected
raid_allow_sync: "{{ lookup('env', 'RAID_ALLOW_SYNC') | default(1, true) | int }}" raid_allow_sync: "{{ lookup('env', 'RAID_ALLOW_SYNC') | default(1, true) | int }}"
# 1 = do not fail when no MD arrays exist on the target
raid_allow_no_array: "{{ lookup('env', 'RAID_ALLOW_NO_ARRAY') | default(0, true) | int }}" raid_allow_no_array: "{{ lookup('env', 'RAID_ALLOW_NO_ARRAY') | default(0, true) | int }}"
# Retry policy
raid_retries: "{{ RETRIES }}" raid_retries: "{{ RETRIES }}"
raid_delay: 2 raid_delay: 2
# Hard timeout for the whole SSH command (prevents hanging forever)
ssh_hard_timeout: 30 ssh_hard_timeout: 30
# SSH options (same style, but avoids auth prompts)
ssh_opts: ssh_opts:
- "-o" # English comments - "-o" # English comments
- "StrictHostKeyChecking=no" - "StrictHostKeyChecking=no"
- "-o" - "-o"
- "UserKnownHostsFile=/dev/null" - "UserKnownHostsFile=/dev/null"
- "-o" - "-o"
- "GlobalKnownHostsFile=/dev/null"
- "-o"
- "LogLevel=ERROR"
- "-o"
- "ConnectTimeout=15" - "ConnectTimeout=15"
- "-o" - "-o"
- "PreferredAuthentications=password" - "PreferredAuthentications=password"
@@ -50,7 +44,6 @@
- "-o" - "-o"
- "NumberOfPasswordPrompts=1" - "NumberOfPasswordPrompts=1"
# Commands to run on the target VM
raid_commands: raid_commands:
- | - |
python3 - <<'PY' python3 - <<'PY'
@@ -67,30 +60,24 @@
print(f"ERROR: cannot read /proc/mdstat: {e}") print(f"ERROR: cannot read /proc/mdstat: {e}")
sys.exit(2) sys.exit(2)
# Find all md arrays present
# We parse tokens like: [2/2] [UU]
arrays = {} arrays = {}
header_re = re.compile(r"^(md\d+)\s*:\s*active.*$", re.MULTILINE) header_re = re.compile(r"^(md\d+)\s*:\s*active.*$", re.MULTILINE)
token_re = re.compile(r"^\s*\d+\s+blocks.*\[\d+/\d+\]\s*\[([U_]+)\]\s*$", re.MULTILINE) token_re = re.compile(r"^\s*\d+\s+blocks.*\[\d+/\d+\]\s*\[([U_]+)\]\s*$", re.MULTILINE)
for m in header_re.finditer(txt): for m in header_re.finditer(txt):
name = m.group(1) name = m.group(1)
start = m.end() chunk = txt[m.end():m.end() + 3000]
# Look ahead for the next token line after this header
chunk = txt[start:start + 3000]
tm = token_re.search(chunk) tm = token_re.search(chunk)
if tm: if tm:
arrays[name] = tm.group(1) arrays[name] = tm.group(1)
if not arrays: if not arrays:
msg = "NO_MD_ARRAYS: /proc/mdstat contains no active md arrays." print("NO_MD_ARRAYS: /proc/mdstat contains no active md arrays.")
print(msg)
print(txt.strip()) print(txt.strip())
sys.exit(0 if allow_no_array else 2) sys.exit(0 if allow_no_array else 2)
syncing = bool(re.search(r"\b(resync|recovery|reshape|check|repair)\b", txt)) syncing = bool(re.search(r"\b(resync|recovery|reshape|check|repair)\b", txt))
# Decide which arrays to check
if target == "auto": if target == "auto":
to_check = sorted(arrays.keys()) to_check = sorted(arrays.keys())
else: else:
@@ -100,18 +87,14 @@
sys.exit(2) sys.exit(2)
to_check = [target] to_check = [target]
bad = [] any_degraded = False
for name in to_check: for name in to_check:
token = arrays[name] token = arrays[name]
degraded = "_" in token degraded = "_" in token
bad.append((name, token, degraded)) any_degraded = any_degraded or degraded
# Print summary
for name, token, degraded in bad:
print(f"RAID={name} token=[{token}] degraded={degraded} syncing={syncing} allow_sync={allow_sync}") print(f"RAID={name} token=[{token}] degraded={degraded} syncing={syncing} allow_sync={allow_sync}")
# Fail conditions if any_degraded:
if any(degraded for _, _, degraded in bad):
sys.exit(1) sys.exit(1)
if syncing and not allow_sync: if syncing and not allow_sync:
@@ -152,11 +135,11 @@
retries: "{{ raid_retries }}" retries: "{{ raid_retries }}"
delay: "{{ raid_delay }}" delay: "{{ raid_delay }}"
until: raid_cmds.rc not in [124, 255] until: raid_cmds.rc not in [124, 255]
run_once: true
- name: Show outputs for each RAID command - name: Show outputs for each RAID command
ansible.builtin.debug: ansible.builtin.debug:
msg: | msg: |
CMD: {{ item.item | default('n/a') }}
RC: {{ item.rc }} RC: {{ item.rc }}
STDOUT: STDOUT:
{{ (item.stdout | default('')).strip() }} {{ (item.stdout | default('')).strip() }}
@@ -164,6 +147,7 @@
{{ (item.stderr | default('')).strip() }} {{ (item.stderr | default('')).strip() }}
loop: "{{ (raid_cmds.results if (raid_cmds.results is defined) else [raid_cmds]) }}" loop: "{{ (raid_cmds.results if (raid_cmds.results is defined) else [raid_cmds]) }}"
when: DEBUG == 1 when: DEBUG == 1
run_once: true
- name: Fail play if RAID check failed # English comments - name: Fail play if RAID check failed # English comments
ansible.builtin.assert: ansible.builtin.assert:
@@ -174,3 +158,4 @@
loop_control: loop_control:
index_var: idx index_var: idx
label: "cmd-{{ idx }}" label: "cmd-{{ idx }}"
run_once: true