forked from jakub/ansible
198 lines
6.7 KiB
YAML
198 lines
6.7 KiB
YAML
# check_raid.yml
|
|
|
|
- name: Check mdadm RAID status on VM (via SSH from proxmox_nextcloud)
|
|
hosts: proxmox_nextcloud
|
|
gather_facts: false
|
|
become: true
|
|
become_user: root
|
|
become_method: sudo
|
|
|
|
vars:
|
|
# VM connection (provided by Semaphore env vars)
|
|
vm_ip: "{{ lookup('env', 'VM_IP') }}"
|
|
vm_user: "{{ lookup('env', 'VM_USER') }}"
|
|
vm_pass: "{{ lookup('env', 'VM_PASS') }}"
|
|
use_sudo: false
|
|
|
|
# Debug / behavior toggles (controlled via Semaphore variables)
|
|
DEBUG: "{{ lookup('env', 'DEBUG') | default(0) | int }}"
|
|
|
|
# Which md device to check (md0, md1, ... or /dev/md0)
|
|
RAID_DEVICE: "{{ lookup('env', 'RAID_DEVICE') | default('md0', true) }}"
|
|
|
|
# Fail the job if rebuild/resync is in progress (recommended)
|
|
FAIL_ON_RESYNC: "{{ lookup('env', 'FAIL_ON_RESYNC') | default(1) | int }}"
|
|
|
|
# Fail the job if a periodic "check" is running (usually set 0 to avoid noise)
|
|
FAIL_ON_CHECK: "{{ lookup('env', 'FAIL_ON_CHECK') | default(0) | int }}"
|
|
|
|
tasks:
|
|
- name: Ensure sshpass is installed (for password-based SSH) # English comments
|
|
ansible.builtin.apt:
|
|
name: sshpass
|
|
state: present
|
|
update_cache: yes
|
|
cache_valid_time: 86400
|
|
|
|
- name: Read /proc/mdstat from VM (via SSH) # English comments
|
|
ansible.builtin.command:
|
|
argv:
|
|
- timeout
|
|
- 25s
|
|
- sshpass
|
|
- -e
|
|
- ssh
|
|
- -o
|
|
- StrictHostKeyChecking=no
|
|
- -o
|
|
- UserKnownHostsFile=/dev/null
|
|
- -o
|
|
- ConnectTimeout=10
|
|
- -o
|
|
- ConnectionAttempts=1
|
|
- -o
|
|
- NumberOfPasswordPrompts=1
|
|
- -o
|
|
- PubkeyAuthentication=no
|
|
- -o
|
|
- GSSAPIAuthentication=no
|
|
- -o
|
|
- PasswordAuthentication=yes
|
|
- -o
|
|
- ServerAliveInterval=5
|
|
- -o
|
|
- ServerAliveCountMax=2
|
|
- "{{ vm_user }}@{{ vm_ip }}"
|
|
- "cat /proc/mdstat"
|
|
environment:
|
|
SSHPASS: "{{ vm_pass }}"
|
|
register: mdstat_cmd
|
|
changed_when: false
|
|
failed_when: false
|
|
no_log: "{{ DEBUG == 0 }}"
|
|
|
|
- name: Fail if we cannot read /proc/mdstat (SSH/auth/network) # English comments
|
|
ansible.builtin.assert:
|
|
that:
|
|
- mdstat_cmd.rc == 0
|
|
fail_msg: >-
|
|
Cannot read /proc/mdstat from {{ vm_ip }} (rc={{ mdstat_cmd.rc }}).
|
|
stderr={{ (mdstat_cmd.stderr | default('') | trim) }}
|
|
success_msg: "Successfully read /proc/mdstat from {{ vm_ip }}."
|
|
changed_when: false
|
|
|
|
- name: Build base variables # English comments
|
|
ansible.builtin.set_fact:
|
|
raid_md: "{{ RAID_DEVICE | regex_replace('^/dev/', '') }}"
|
|
mdstat_text: "{{ mdstat_cmd.stdout | default('') }}"
|
|
changed_when: false
|
|
|
|
- name: Extract selected md block from mdstat # English comments
|
|
ansible.builtin.set_fact:
|
|
raid_block: >-
|
|
{{
|
|
(
|
|
mdstat_text
|
|
| regex_findall(
|
|
'(?ms)^' ~ raid_md ~ '\\s*:.*?(?=^md\\d+\\s*:|^unused devices:|\\Z)'
|
|
)
|
|
| first
|
|
| default('', true)
|
|
)
|
|
}}
|
|
changed_when: false
|
|
|
|
- name: Parse RAID status from mdstat (safe parsing) # English comments
|
|
ansible.builtin.set_fact:
|
|
raid_present: "{{ (raid_block | length) > 0 }}"
|
|
raid_status: >-
|
|
{{
|
|
(
|
|
raid_block
|
|
| regex_findall('\\[[0-9]+/[0-9]+\\]\\s*\\[([U_]+)\\]')
|
|
| first
|
|
| default('', true)
|
|
)
|
|
}}
|
|
raid_is_degraded: "{{ (raid_status | length > 0) and ('_' in raid_status) }}"
|
|
raid_is_rebuilding: "{{ raid_block is search('(?i)\\b(resync|recovery|reshape|repair)\\b') }}"
|
|
raid_is_checking: "{{ raid_block is search('(?i)\\bcheck\\b') }}"
|
|
raid_action_line: >-
|
|
{{
|
|
(
|
|
raid_block
|
|
| regex_findall('(?im)^(\\s*\\[[^\\]]+\\].*\\b(?:resync|recovery|reshape|repair|check)\\b.*)$')
|
|
| first
|
|
| default('', true)
|
|
)
|
|
}}
|
|
raid_progress: >-
|
|
{{
|
|
(
|
|
raid_block
|
|
| regex_findall('(?i)\\b(?:resync|recovery|reshape|repair|check)\\b\\s*=\\s*([0-9.]+)%')
|
|
| first
|
|
| default('', true)
|
|
)
|
|
}}
|
|
changed_when: false
|
|
|
|
- name: Debug | Show mdstat and parsed values # English comments
|
|
ansible.builtin.debug:
|
|
msg: |
|
|
--- /proc/mdstat ---
|
|
{{ mdstat_text }}
|
|
--- md block ({{ raid_md }}) ---
|
|
{{ raid_block }}
|
|
--- Parsed ---
|
|
raid_present={{ raid_present }}
|
|
raid_status={{ raid_status }}
|
|
raid_is_degraded={{ raid_is_degraded }}
|
|
raid_is_rebuilding={{ raid_is_rebuilding }}
|
|
raid_is_checking={{ raid_is_checking }}
|
|
raid_progress={{ raid_progress }}
|
|
raid_action_line={{ raid_action_line }}
|
|
when: DEBUG == 1
|
|
|
|
- name: Fail if RAID device is not present # English comments
|
|
ansible.builtin.assert:
|
|
that:
|
|
- raid_present
|
|
fail_msg: "RAID {{ raid_md }} not found in /proc/mdstat on VM {{ vm_ip }}."
|
|
success_msg: "RAID {{ raid_md }} found in /proc/mdstat."
|
|
changed_when: false
|
|
|
|
- name: Fail if RAID is degraded (missing member) # English comments
|
|
ansible.builtin.assert:
|
|
that:
|
|
- not raid_is_degraded
|
|
fail_msg: >-
|
|
RAID {{ raid_md }} is DEGRADED: status={{ raid_status }}.
|
|
mdstat excerpt={{ (raid_block | trim) }}
|
|
success_msg: "RAID {{ raid_md }} is OK: status={{ raid_status }}."
|
|
changed_when: false
|
|
|
|
- name: Fail if RAID rebuild/resync is in progress (optional) # English comments
|
|
ansible.builtin.assert:
|
|
that:
|
|
- not raid_is_rebuilding
|
|
fail_msg: >-
|
|
RAID {{ raid_md }} is rebuilding/resyncing.
|
|
{{ 'progress=' ~ raid_progress ~ '%; ' if (raid_progress | length) > 0 else '' }}
|
|
line={{ (raid_action_line | default('n/a', true)) }}
|
|
success_msg: "RAID {{ raid_md }} is not rebuilding/resyncing."
|
|
when: FAIL_ON_RESYNC | int == 1
|
|
changed_when: false
|
|
|
|
- name: Fail if RAID check is in progress (optional) # English comments
|
|
ansible.builtin.assert:
|
|
that:
|
|
- not raid_is_checking
|
|
fail_msg: >-
|
|
RAID {{ raid_md }} is running a periodic check.
|
|
{{ 'progress=' ~ raid_progress ~ '%; ' if (raid_progress | length) > 0 else '' }}
|
|
line={{ (raid_action_line | default('n/a', true)) }}
|
|
success_msg: "RAID {{ raid_md }} is not running a periodic check."
|
|
when: FAIL_ON_CHECK | int == 1
|
|
changed_when: false
|