118 lines
4.2 KiB
YAML
118 lines
4.2 KiB
YAML
# check_raid.yml
|
|
|
|
- name: Check Linux MD RAID health on VM via Proxmox
|
|
hosts: linux_servers
|
|
gather_facts: false
|
|
become: true
|
|
become_user: root
|
|
become_method: sudo
|
|
|
|
vars:
|
|
# VM connection (provided by Semaphore env vars)
|
|
vm_ip: "{{ lookup('env', 'VM_IP') }}"
|
|
vm_user: "{{ lookup('env', 'VM_USER') }}"
|
|
vm_pass: "{{ lookup('env', 'VM_PASS') }}"
|
|
use_sudo: false
|
|
|
|
# Debug / retries
|
|
DEBUG: "{{ lookup('env', 'DEBUG') | default(0) | int }}"
|
|
RETRIES: "{{ lookup('env', 'RETRIES') | default(10) | int }}"
|
|
|
|
# RAID device to check (e.g. md0, md1...)
|
|
raid_md_device: "{{ lookup('env', 'RAID_MD') | default('md0', true) }}"
|
|
|
|
# If 0 => fail when resync/recovery/reshape/check/repair is detected in /proc/mdstat
|
|
# If 1 => allow sync operations (still fails only on degraded [U_], [_U], etc.)
|
|
raid_allow_sync: "{{ lookup('env', 'RAID_ALLOW_SYNC') | default(1, true) | int }}"
|
|
|
|
# SSH options
|
|
ssh_connect_timeout: 15
|
|
|
|
tasks:
|
|
- name: Ensure sshpass is installed (for password-based SSH) # English comments
|
|
ansible.builtin.apt:
|
|
name: sshpass
|
|
state: present
|
|
update_cache: yes
|
|
|
|
- name: Read /proc/mdstat from VM (via SSH) # English comments
|
|
ansible.builtin.command:
|
|
argv:
|
|
- sshpass
|
|
- -e
|
|
- ssh
|
|
- -o
|
|
- StrictHostKeyChecking=no
|
|
- -o
|
|
- "ConnectTimeout={{ ssh_connect_timeout }}"
|
|
- "{{ vm_user }}@{{ vm_ip }}"
|
|
- bash
|
|
- -lc
|
|
- "{{ ('sudo ' if use_sudo else '') + 'cat /proc/mdstat' }}"
|
|
environment:
|
|
SSHPASS: "{{ vm_pass }}"
|
|
register: mdstat_raw
|
|
changed_when: false
|
|
no_log: "{{ DEBUG == 0 }}"
|
|
retries: "{{ RETRIES }}"
|
|
delay: 2
|
|
until: mdstat_raw.rc == 0
|
|
|
|
- name: Debug | Show raw /proc/mdstat # English comments
|
|
ansible.builtin.debug:
|
|
msg: "{{ mdstat_raw.stdout }}"
|
|
when: DEBUG == 1
|
|
|
|
- name: Extract RAID status token for selected MD device # English comments
|
|
ansible.builtin.set_fact:
|
|
raid_token: >-
|
|
{{
|
|
(mdstat_raw.stdout | regex_search(
|
|
raid_md_device ~ '\\s*:\\s*active.*\\n\\s*\\d+\\s+blocks.*\\[[0-9]+/[0-9]+\\]\\s*\\[([U_]+)\\]',
|
|
'\\1',
|
|
multiline=True
|
|
)) | default('')
|
|
}}
|
|
|
|
- name: Detect sync operations in mdstat (resync/recovery/reshape/check/repair) # English comments
|
|
ansible.builtin.set_fact:
|
|
raid_syncing: "{{ (mdstat_raw.stdout is search('resync|recovery|reshape|check|repair')) | bool }}"
|
|
|
|
- name: Compute degraded flag (underscore means missing member) # English comments
|
|
ansible.builtin.set_fact:
|
|
raid_degraded: "{{ (raid_token | length > 0) and ('_' in raid_token) }}"
|
|
|
|
- name: Fail if MD device not found in /proc/mdstat # English comments
|
|
ansible.builtin.assert:
|
|
that:
|
|
- raid_token | length > 0
|
|
fail_msg: "RAID device {{ raid_md_device }} was not found in /proc/mdstat on VM ({{ vm_ip }})."
|
|
success_msg: "RAID device {{ raid_md_device }} found in /proc/mdstat."
|
|
changed_when: false
|
|
|
|
- name: Fail if RAID is degraded (token contains '_') # English comments
|
|
ansible.builtin.assert:
|
|
that:
|
|
- not raid_degraded
|
|
fail_msg: "RAID {{ raid_md_device }} is DEGRADED: token=[{{ raid_token }}] (expected all 'U')."
|
|
success_msg: "RAID {{ raid_md_device }} is OK: token=[{{ raid_token }}]."
|
|
changed_when: false
|
|
|
|
- name: Fail if RAID is syncing and syncing is not allowed # English comments
|
|
ansible.builtin.assert:
|
|
that:
|
|
- (raid_allow_sync | int) == 1 or (not raid_syncing)
|
|
fail_msg: "RAID {{ raid_md_device }} is running a sync operation (resync/recovery/reshape/check/repair) and RAID_ALLOW_SYNC=0."
|
|
success_msg: "No sync operation detected (or RAID_ALLOW_SYNC=1)."
|
|
changed_when: false
|
|
|
|
- name: Print concise summary (debug) # English comments
|
|
ansible.builtin.debug:
|
|
msg: >-
|
|
RAID={{ raid_md_device }},
|
|
token=[{{ raid_token }}],
|
|
degraded={{ raid_degraded }},
|
|
syncing={{ raid_syncing }},
|
|
allow_sync={{ raid_allow_sync }}
|
|
when: DEBUG == 1
|