# check_raid.yml - name: Check mdadm RAID status on VM (via SSH from proxmox_nextcloud) hosts: proxmox_nextcloud gather_facts: false become: true become_user: root become_method: sudo vars: # VM connection (provided by Semaphore env vars) vm_ip: "{{ lookup('env', 'VM_IP') }}" vm_user: "{{ lookup('env', 'VM_USER') }}" vm_pass: "{{ lookup('env', 'VM_PASS') }}" use_sudo: false # Debug / behavior toggles (controlled via Semaphore variables) DEBUG: "{{ lookup('env', 'DEBUG') | default(0) | int }}" # Which md device to check (md0, md1, ... or /dev/md0) RAID_DEVICE: "{{ lookup('env', 'RAID_DEVICE') | default('md0', true) }}" # Fail the job if rebuild/resync is in progress (recommended) FAIL_ON_RESYNC: "{{ lookup('env', 'FAIL_ON_RESYNC') | default(1) | int }}" # Fail the job if a periodic "check" is running (usually set 0 to avoid noise) FAIL_ON_CHECK: "{{ lookup('env', 'FAIL_ON_CHECK') | default(0) | int }}" tasks: - name: Ensure sshpass is installed (for password-based SSH) # English comments ansible.builtin.apt: name: sshpass state: present update_cache: yes cache_valid_time: 86400 - name: Read /proc/mdstat from VM (via SSH) # English comments ansible.builtin.command: argv: - timeout - 25s - sshpass - -e - ssh - -o - StrictHostKeyChecking=no - -o - UserKnownHostsFile=/dev/null - -o - ConnectTimeout=10 - -o - ConnectionAttempts=1 - -o - NumberOfPasswordPrompts=1 - -o - PubkeyAuthentication=no - -o - GSSAPIAuthentication=no - -o - PasswordAuthentication=yes - -o - ServerAliveInterval=5 - -o - ServerAliveCountMax=2 - "{{ vm_user }}@{{ vm_ip }}" - "cat /proc/mdstat" environment: SSHPASS: "{{ vm_pass }}" register: mdstat_cmd changed_when: false failed_when: false no_log: "{{ DEBUG == 0 }}" - name: Fail if we cannot read /proc/mdstat (SSH/auth/network) # English comments ansible.builtin.assert: that: - mdstat_cmd.rc == 0 fail_msg: >- Cannot read /proc/mdstat from {{ vm_ip }} (rc={{ mdstat_cmd.rc }}). stderr={{ (mdstat_cmd.stderr | default('') | trim) }} success_msg: "Successfully read /proc/mdstat from {{ vm_ip }}." changed_when: false - name: Build base variables # English comments ansible.builtin.set_fact: raid_md: "{{ RAID_DEVICE | regex_replace('^/dev/', '') }}" mdstat_text: "{{ mdstat_cmd.stdout | default('') }}" changed_when: false - name: Extract selected md block from mdstat (no regex) # English comments ansible.builtin.set_fact: raid_block: >- {%- set ns = namespace(block='') -%} {%- for b in (mdstat_text.split('\n\n')) -%} {%- set bb = (b | trim) -%} {%- if bb.startswith(raid_md ~ ' :') or bb.startswith(raid_md ~ ':') -%} {%- set ns.block = bb -%} {%- endif -%} {%- endfor -%} {{ ns.block }} changed_when: false - name: Parse RAID status from mdstat (no regex) # English comments ansible.builtin.set_fact: raid_present: "{{ (raid_block | trim | length) > 0 }}" # Extract [UU] / [U_] token by scanning bracket tokens and keeping only U/_ chars raid_status: >- {%- set ns = namespace(st='') -%} {%- for line in (raid_block.splitlines() if (raid_block | length) > 0 else []) -%} {%- for tok in line.split() -%} {%- if tok.startswith('[') and tok.endswith(']') -%} {%- set inner = tok[1:-1] -%} {%- if (inner | length) > 0 and ((inner | list | difference(['U','_'])) | length == 0) -%} {%- set ns.st = inner -%} {%- endif -%} {%- endif -%} {%- endfor -%} {%- endfor -%} {{ ns.st }} raid_is_degraded: "{{ (raid_status | length > 0) and ('_' in raid_status) }}" raid_block_lower: "{{ raid_block | lower }}" raid_is_rebuilding: >- {{ ('resync' in raid_block_lower) or ('recovery' in raid_block_lower) or ('reshape' in raid_block_lower) or ('repair' in raid_block_lower) }} raid_is_checking: "{{ 'check' in raid_block_lower }}" # First line that contains an action keyword raid_action_line: >- {%- set ns = namespace(line='') -%} {%- set keys = ['resync','recovery','reshape','repair','check'] -%} {%- for line in (raid_block.splitlines() if (raid_block | length) > 0 else []) -%} {%- set l = (line | lower) -%} {%- for k in keys -%} {%- if ns.line == '' and (k in l) -%} {%- set ns.line = (line | trim) -%} {%- endif -%} {%- endfor -%} {%- endfor -%} {{ ns.line }} # Parse progress from ".... = 12.3%" if present raid_progress: >- {%- set ns = namespace(p='') -%} {%- if (raid_action_line | length) > 0 and ('=' in raid_action_line) and ('%' in raid_action_line) -%} {%- set right = raid_action_line.split('=', 1)[1] -%} {%- set ns.p = (right.split('%', 1)[0] | trim) -%} {%- endif -%} {{ ns.p }} changed_when: false - name: Debug | Show mdstat and parsed values # English comments ansible.builtin.debug: msg: | --- /proc/mdstat --- {{ mdstat_text }} --- md block ({{ raid_md }}) --- {{ raid_block }} --- Parsed --- raid_present={{ raid_present }} raid_status={{ raid_status }} raid_is_degraded={{ raid_is_degraded }} raid_is_rebuilding={{ raid_is_rebuilding }} raid_is_checking={{ raid_is_checking }} raid_progress={{ raid_progress }} raid_action_line={{ raid_action_line }} when: DEBUG == 1 - name: Fail if RAID device is not present # English comments ansible.builtin.assert: that: - raid_present fail_msg: "RAID {{ raid_md }} not found in /proc/mdstat on VM {{ vm_ip }}." success_msg: "RAID {{ raid_md }} found in /proc/mdstat." changed_when: false - name: Fail if RAID is degraded (missing member) # English comments ansible.builtin.assert: that: - not raid_is_degraded fail_msg: >- RAID {{ raid_md }} is DEGRADED: status={{ raid_status }}. mdstat excerpt={{ (raid_block | trim) }} success_msg: "RAID {{ raid_md }} is OK: status={{ raid_status }}." changed_when: false - name: Fail if RAID rebuild/resync is in progress (optional) # English comments ansible.builtin.assert: that: - not raid_is_rebuilding fail_msg: >- RAID {{ raid_md }} is rebuilding/resyncing. {{ 'progress=' ~ raid_progress ~ '%; ' if (raid_progress | length) > 0 else '' }} line={{ (raid_action_line | default('n/a', true)) }} success_msg: "RAID {{ raid_md }} is not rebuilding/resyncing." when: FAIL_ON_RESYNC | int == 1 changed_when: false - name: Fail if RAID check is in progress (optional) # English comments ansible.builtin.assert: that: - not raid_is_checking fail_msg: >- RAID {{ raid_md }} is running a periodic check. {{ 'progress=' ~ raid_progress ~ '%; ' if (raid_progress | length) > 0 else '' }} line={{ (raid_action_line | default('n/a', true)) }} success_msg: "RAID {{ raid_md }} is not running a periodic check." when: FAIL_ON_CHECK | int == 1 changed_when: false