# check_raid.yml - name: Check Linux MD RAID health on VM via Proxmox hosts: linux_servers gather_facts: false become: true become_user: root become_method: sudo vars: # VM connection (provided by Semaphore env vars) vm_ip: "{{ lookup('env', 'VM_IP') }}" vm_user: "{{ lookup('env', 'VM_USER') }}" vm_pass: "{{ lookup('env', 'VM_PASS') }}" use_sudo: false # Debug / retries DEBUG: "{{ lookup('env', 'DEBUG') | default(0) | int }}" RETRIES: "{{ lookup('env', 'RETRIES') | default(10) | int }}" # RAID device to check (e.g. md0, md1...) raid_md_device: "{{ lookup('env', 'RAID_MD') | default('md0', true) }}" # If 0 => fail when resync/recovery/reshape/check/repair is detected in /proc/mdstat # If 1 => allow sync operations (still fails only on degraded [U_], [_U], etc.) raid_allow_sync: "{{ lookup('env', 'RAID_ALLOW_SYNC') | default(1, true) | int }}" # SSH options ssh_connect_timeout: 15 tasks: - name: Ensure sshpass is installed (for password-based SSH) # English comments ansible.builtin.apt: name: sshpass state: present update_cache: yes - name: Read /proc/mdstat from VM (via SSH) # English comments ansible.builtin.command: argv: - sshpass - -e - ssh - -o - StrictHostKeyChecking=no - -o - "ConnectTimeout={{ ssh_connect_timeout }}" - "{{ vm_user }}@{{ vm_ip }}" - bash - -lc - "{{ ('sudo ' if use_sudo else '') + 'cat /proc/mdstat' }}" environment: SSHPASS: "{{ vm_pass }}" register: mdstat_raw changed_when: false no_log: "{{ DEBUG == 0 }}" retries: "{{ RETRIES }}" delay: 2 until: mdstat_raw.rc == 0 - name: Debug | Show raw /proc/mdstat # English comments ansible.builtin.debug: msg: "{{ mdstat_raw.stdout }}" when: DEBUG == 1 - name: Extract RAID status token for selected MD device # English comments ansible.builtin.set_fact: raid_token: >- {{ (mdstat_raw.stdout | regex_search( raid_md_device ~ '\\s*:\\s*active.*\\n\\s*\\d+\\s+blocks.*\\[[0-9]+/[0-9]+\\]\\s*\\[([U_]+)\\]', '\\1', multiline=True )) | default('') }} - name: Detect sync operations in mdstat (resync/recovery/reshape/check/repair) # English comments ansible.builtin.set_fact: raid_syncing: "{{ (mdstat_raw.stdout is search('resync|recovery|reshape|check|repair')) | bool }}" - name: Compute degraded flag (underscore means missing member) # English comments ansible.builtin.set_fact: raid_degraded: "{{ (raid_token | length > 0) and ('_' in raid_token) }}" - name: Fail if MD device not found in /proc/mdstat # English comments ansible.builtin.assert: that: - raid_token | length > 0 fail_msg: "RAID device {{ raid_md_device }} was not found in /proc/mdstat on VM ({{ vm_ip }})." success_msg: "RAID device {{ raid_md_device }} found in /proc/mdstat." changed_when: false - name: Fail if RAID is degraded (token contains '_') # English comments ansible.builtin.assert: that: - not raid_degraded fail_msg: "RAID {{ raid_md_device }} is DEGRADED: token=[{{ raid_token }}] (expected all 'U')." success_msg: "RAID {{ raid_md_device }} is OK: token=[{{ raid_token }}]." changed_when: false - name: Fail if RAID is syncing and syncing is not allowed # English comments ansible.builtin.assert: that: - (raid_allow_sync | int) == 1 or (not raid_syncing) fail_msg: "RAID {{ raid_md_device }} is running a sync operation (resync/recovery/reshape/check/repair) and RAID_ALLOW_SYNC=0." success_msg: "No sync operation detected (or RAID_ALLOW_SYNC=1)." changed_when: false - name: Print concise summary (debug) # English comments ansible.builtin.debug: msg: >- RAID={{ raid_md_device }}, token=[{{ raid_token }}], degraded={{ raid_degraded }}, syncing={{ raid_syncing }}, allow_sync={{ raid_allow_sync }} when: DEBUG == 1