#!/bin/sh # bigdisk-mount-guard — autorecover tool for the ~/_/bigdisk NFS mount # (Apple Photos originals + other big storage on black). # # The mount is provided both via: # - /etc/auto_nfs (autofs static): bigdisk -fstype=nfs,resvport black:/bigdisk # - /etc/fstab (hard NFS): 10.0.0.11:/bigdisk /Users/natalie/_/bigdisk nfs resvport,rw,bg,hard,intr,tcp # # On sleep, network roam, WG flap or "no WiFi" location switches the handle often # goes stale. autofs/fstab fails to re-trigger, Photos sees a dead symlink for # originals and the library gets stuck (permission errors, "Repairing…", repeated # password prompts for com.apple.library-repair). # # This tool autorecovers: # 1. If storage IP unreachable, attempt to bring the WireGuard mesh (wg1) up. # 2. If server reachable but mount wedged, force-umount + touch to re-trigger. # 3. On success, run Photos-specific recovery (clean DB locks, restart daemons, # restore canonical symlink if it had been pointed at a local stub). # # Designed to run as root LaunchDaemon (contrib plist). Also safe/useful by hand # or from host-monitor/disk-guard style tools. # # No GNU timeout (uses perl alarm, always on macOS). set -u # Support limited CLI for use as a tool: --status, --force, --help case "${1:-}" in --help|-h) cat <<'EOF' bigdisk-mount-guard [options] (no arg) normal check + autorecover (idempotent, for launchd or cron) --status report only (0=healthy, 1=needs attention), no changes --force force a full recovery cycle (useful after manual WG bring-up) --help this message Runs as root (needs umount/mount/wg-quick). Logs to $LOG. EOF exit 0 ;; --status) STATUS_ONLY=1 ;; --force) FORCE=1 ;; "") ;; *) echo "unknown arg: $1" >&2 exit 64 ;; esac MP="/Users/natalie/_/bigdisk" PROBE="${MP}/Photos" # a dir that only exists when the mount is live SERVER_IP="10.0.0.11" # black; 'black' resolves here too, routed via LAN or WG LOG="/var/log/bigdisk-mount-guard.log" WG_IF="wg1" WG_CONF="/Users/natalie/.wireguard/wg1.conf" PHOTOS_LIB="/Users/natalie/Pictures/Photos Library.photoslibrary" log() { echo "$(/bin/date '+%Y-%m-%dT%H:%M:%S') $*" >> "$LOG" 2>/dev/null; } # macOS notification (works from root daemon for the console user) notify() { title="$1" msg="$2" /usr/bin/osascript -e "display notification \"$msg\" with title \"$title\" sound name \"Ping\"" 2>/dev/null || true } # run "$@" but kill it after N seconds (first arg); returns the command's status, # or 124 if it timed out / failed to exec. Uses perl's SIGALRM — always present on macOS. bounded() { _t=$1; shift /usr/bin/perl -e 'alarm shift; exec @ARGV or exit 124' "$_t" "$@" } ensure_wg() { if /opt/homebrew/bin/wg show "$WG_IF" >/dev/null 2>&1; then return 0 fi log "WG $WG_IF down; attempting wg-quick up to reach $SERVER_IP" /opt/homebrew/bin/wg-quick down "$WG_CONF" >/dev/null 2>&1 || true if /opt/homebrew/bin/wg-quick up "$WG_CONF" >>"$LOG" 2>&1; then log "WG up succeeded; waiting for mesh routes" sleep 8 return 0 else log "wg-quick up failed" return 1 fi } recover_photos() { [ -d "$PHOTOS_LIB" ] || return 0 log "Photos autorecover cleanup" # stale locks often block open/repair after volume events find "$PHOTOS_LIB/database" -name '*lock*' -type f -delete 2>/dev/null || true # restart the user's analysis daemons so they see the freshly mounted originals /usr/bin/sudo -u natalie /usr/bin/pkill -x photoanalysisd 2>/dev/null || true /usr/bin/sudo -u natalie /usr/bin/pkill -f photolibraryd 2>/dev/null || true # if a previous troubleshooting step pointed the symlink at a local stub, restore the real one cur=$(readlink "$PHOTOS_LIB/originals" 2>/dev/null || true) if echo "$cur" | grep -q 'Photos-stub\|stub/originals'; then ln -sfn "${MP}/Photos/Photos Library.photoslibrary/originals" "$PHOTOS_LIB/originals" log "restored canonical originals symlink" fi log "Photos cleanup done — relaunch Photos (hold Option+Command for Repair if it still whines)" notify "bigdisk-mount-guard" "Photos library autorecovered (mount + cleanup)" } # --- main --- if [ "${STATUS_ONLY:-0}" = 1 ]; then if bounded 5 /usr/bin/stat "$PROBE" >/dev/null 2>&1; then echo "OK: bigdisk probe $PROBE reachable" exit 0 else echo "STALE: bigdisk probe $PROBE not reachable" if /sbin/ping -c1 -t2 "$SERVER_IP" >/dev/null 2>&1; then echo " server $SERVER_IP is up (mount wedged?)" else echo " server $SERVER_IP unreachable" fi exit 1 fi fi # Healthy path: probe stats quickly. Nothing (more) to do. if [ "${FORCE:-0}" != 1 ] && bounded 8 /usr/bin/stat "$PROBE" >/dev/null 2>&1; then exit 0 fi # Path not reachable (or forced). Try to make the server reachable first. if ! /sbin/ping -c1 -t3 "$SERVER_IP" >/dev/null 2>&1; then log "probe failed; ${SERVER_IP} unreachable" if ensure_wg; then notify "bigdisk-mount-guard" "Brought up WG mesh to reach storage" if ! /sbin/ping -c1 -t3 "$SERVER_IP" >/dev/null 2>&1; then log "${SERVER_IP} still unreachable after WG attempt — leaving mount alone" notify "bigdisk-mount-guard" "bigdisk offline (server unreachable after WG) — no action" exit 0 fi # fallthrough to mount recovery now that mesh is up else log "WG recovery not possible — offline, leaving mount alone" notify "bigdisk-mount-guard" "bigdisk offline (server unreachable) — no action taken" exit 0 fi fi # Server reachable (possibly after WG recovery) but probe failed → wedged/stale mount. log "probe failed but ${SERVER_IP} reachable — force-clearing wedged mount at ${MP}" notify "bigdisk-mount-guard" "bigdisk mount wedged — force recovering now" bounded 10 /sbin/umount -f "$MP" >/dev/null 2>&1 || true # Kick automount to refresh static maps (safe/no-op if already good). bounded 5 /usr/sbin/automount -vc >>"$LOG" 2>&1 || true # Touch the path to (re)trigger autofs (or fstab bg reconnect). bounded 15 /bin/ls "$MP" >/dev/null 2>&1 || true sleep 3 if bounded 10 /usr/bin/stat "$PROBE" >/dev/null 2>&1; then log "remount OK" notify "bigdisk-mount-guard" "bigdisk mount recovered successfully" recover_photos exit 0 fi log "remount still failing after force-clear" notify "bigdisk-mount-guard" "bigdisk remount still failing (check logs)" exit 1