apricot-health/scripts/apricot-rasdaemon-setup
Natalie dafbabee41 feat(@packages/apricot-health): add power-fault monitoring and mitigation tools
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-17 23:18:47 -07:00

42 lines
1.6 KiB
Bash
Executable file

#!/usr/bin/env bash
# Install + enable rasdaemon for detailed AMD MCA/MCE parsing.
#
# rasdaemon runs a trace-buffer consumer that decodes machine-check events
# into a sqlite DB (~/ras-mc_event.db usually at /var/lib/rasdaemon/) and
# syslogs them in human-readable form. Much more detail than edac_mce_amd
# alone. If any crash is in-CPU or NB-side (not pure board-level power
# loss), this catches it.
#
# Idempotent. Safe to re-run.
set -o pipefail
log() { printf '[%s] apricot-rasdaemon-setup: %s\n' "$(date --iso-8601=s)" "$*"; }
if ! command -v rasdaemon >/dev/null 2>&1; then
log "rasdaemon not installed — attempting rpm-ostree install"
if command -v rpm-ostree >/dev/null 2>&1; then
sudo rpm-ostree install rasdaemon \
&& log "installed — a reboot is required for the layered package to activate" \
|| { log "rpm-ostree install failed"; exit 1; }
elif command -v dnf >/dev/null 2>&1; then
sudo dnf install -y rasdaemon \
|| { log "dnf install failed"; exit 1; }
else
log "no package manager found; install rasdaemon manually"
exit 1
fi
fi
# Enable + start the service. On rpm-ostree systems this is deferred until
# reboot; systemctl will still succeed (the symlink is made).
sudo systemctl enable rasdaemon.service 2>&1 | grep -v '^Created' || true
sudo systemctl start rasdaemon.service 2>&1 \
&& log "rasdaemon.service started" \
|| log "rasdaemon.service will start after reboot (layered package)"
log "status:"
systemctl status rasdaemon.service --no-pager 2>&1 | head -10 || true
log "recent events (may be empty):"
sudo ras-mc-ctl --summary 2>&1 | head -15 || true