# check_raid.yml - name: Check mdadm RAID status on VM (via SSH from Semaphore host) hosts: linux_servers gather_facts: false become: true become_user: root become_method: sudo vars: # VM connection (provided by Semaphore env vars) vm_ip: "{{ lookup('env', 'VM_IP') }}" vm_user: "{{ lookup('env', 'VM_USER') }}" vm_pass: "{{ lookup('env', 'VM_PASS') }}" use_sudo: false # Debug / behavior toggles (controlled via Semaphore variables) DEBUG: "{{ lookup('env', 'DEBUG') | default(0) | int }}" # Which md device to check (md0, md1, ... or /dev/md0) RAID_DEVICE: "{{ lookup('env', 'RAID_DEVICE') | default('md0', true) }}" # Fail the job if rebuild/resync is in progress (recommended) FAIL_ON_RESYNC: "{{ lookup('env', 'FAIL_ON_RESYNC') | default(1) | int }}" # Fail the job if a periodic "check" is running (usually set 0 to avoid noise) FAIL_ON_CHECK: "{{ lookup('env', 'FAIL_ON_CHECK') | default(0) | int }}" tasks: - name: Ensure sshpass is installed (for password-based SSH) # English comments ansible.builtin.apt: name: sshpass state: present update_cache: yes cache_valid_time: 86400 - name: Read /proc/mdstat from VM (via SSH) # English comments ansible.builtin.command: argv: - sshpass - -e - ssh - -o - StrictHostKeyChecking=no - -o - ConnectTimeout=15 - "{{ vm_user }}@{{ vm_ip }}" - bash - -lc - "{{ ('sudo ' if use_sudo else '') + 'cat /proc/mdstat' }}" environment: SSHPASS: "{{ vm_pass }}" register: mdstat_cmd changed_when: false failed_when: false no_log: "{{ DEBUG == 0 }}" # hides stdout/stderr in normal mode, but asserts below will still report summary - name: Fail if we cannot read /proc/mdstat (SSH/auth/network) # English comments ansible.builtin.assert: that: - mdstat_cmd.rc == 0 fail_msg: >- Cannot read /proc/mdstat from {{ vm_ip }} (rc={{ mdstat_cmd.rc }}). stderr={{ (mdstat_cmd.stderr | default('') | trim) }} success_msg: "Successfully read /proc/mdstat from {{ vm_ip }}." changed_when: false - name: Parse RAID status from mdstat # English comments ansible.builtin.set_fact: raid_md: "{{ RAID_DEVICE | regex_replace('^/dev/', '') }}" mdstat_text: "{{ mdstat_cmd.stdout | default('') }}" raid_present: "{{ (mdstat_cmd.stdout | default('')) is search('(?m)^' ~ (RAID_DEVICE | regex_replace('^/dev/','')) ~ '\\s*:') }}" raid_status: >- {{ (mdstat_cmd.stdout | default('')) | regex_search( '(?ms)^' ~ (RAID_DEVICE | regex_replace('^/dev/','')) ~ '\\s*:.*?\\n.*?\\[[0-9]+/[0-9]+\\]\\s*\\[([U_]+)\\]', '\\1' ) | default('') }} raid_is_degraded: "{{ (raid_status | length > 0) and ('_' in raid_status) }}" raid_is_rebuilding: "{{ (mdstat_cmd.stdout | default('')) is search('(?i)\\b(resync|recovery|reshape|repair)\\b') }}" raid_is_checking: "{{ (mdstat_cmd.stdout | default('')) is search('(?i)\\bcheck\\b') }}" raid_action_line: >- {{ (mdstat_cmd.stdout | default('')) | regex_search( '(?im)^(\\s*\\[[^\\]]+\\].*\\b(resync|recovery|reshape|repair|check)\\b.*)$', '\\1' ) | default('') }} raid_progress: >- {{ (mdstat_cmd.stdout | default('')) | regex_search('(?i)\\b(resync|recovery|reshape|repair|check)\\b\\s*=\\s*([0-9.]+)%', '\\2') | default('') }} changed_when: false - name: Debug | Show mdstat and parsed values # English comments ansible.builtin.debug: msg: | --- /proc/mdstat --- {{ mdstat_text }} --- Parsed --- raid_md={{ raid_md }} raid_present={{ raid_present }} raid_status={{ raid_status }} raid_is_degraded={{ raid_is_degraded }} raid_is_rebuilding={{ raid_is_rebuilding }} raid_is_checking={{ raid_is_checking }} raid_progress={{ raid_progress }} raid_action_line={{ raid_action_line }} when: DEBUG == 1 - name: Fail if RAID device is not present # English comments ansible.builtin.assert: that: - raid_present fail_msg: "RAID {{ raid_md }} not found in /proc/mdstat on {{ vm_ip }}." success_msg: "RAID {{ raid_md }} found in /proc/mdstat." changed_when: false - name: Fail if RAID is degraded (missing member) # English comments ansible.builtin.assert: that: - not raid_is_degraded fail_msg: >- RAID {{ raid_md }} is DEGRADED: status={{ raid_status }}. mdstat excerpt={{ (mdstat_text | trim) }} success_msg: "RAID {{ raid_md }} is OK: status={{ raid_status }}." changed_when: false - name: Fail if RAID rebuild/resync is in progress (optional) # English comments ansible.builtin.assert: that: - not raid_is_rebuilding fail_msg: >- RAID {{ raid_md }} is rebuilding/resyncing. {{ 'progress=' ~ raid_progress ~ '%; ' if (raid_progress | length) > 0 else '' }} line={{ raid_action_line | default('n/a') }} success_msg: "RAID {{ raid_md }} is not rebuilding/resyncing." when: FAIL_ON_RESYNC | int == 1 changed_when: false - name: Fail if RAID check is in progress (optional) # English comments ansible.builtin.assert: that: - not raid_is_checking fail_msg: >- RAID {{ raid_md }} is running a periodic check. {{ 'progress=' ~ raid_progress ~ '%; ' if (raid_progress | length) > 0 else '' }} line={{ raid_action_line | default('n/a') }} success_msg: "RAID {{ raid_md }} is not running a periodic check." when: FAIL_ON_CHECK | int == 1 changed_when: false