analytics/scripts/deploy.sh

260 lines
12 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
# =============================================================================
# @analytics — Deploy to vps-0
# =============================================================================
# Build images on a chosen BUILD_HOST, ship via docker save | ssh | docker load,
# then `docker compose up -d --no-build` on the VPS.
#
# Why not build on the VPS: vps-0 has 4 GB RAM. `docker compose --build` there
# OOM-kills nginx (incident 2026-05-15). Build elsewhere, ship the images.
#
# Build host (apricot, the old x86 builder, is decommissioned) — BUILD_HOST env:
# black (default) → LAN amd64 host, builds NATIVELY (fast); context rsync'd over,
# images streamed black → VPS via this host.
# local → this host (plum, arm64); cross-builds amd64 under emulation
# (DOCKER_DEFAULT_PLATFORM=linux/amd64). Slower fallback.
# quinn-vps → last resort: builds on the 4 GB target itself (OOM risk).
# All paths target linux/amd64 — a native arm64 image crashes on the VPS with
# "exec format error". Override the arch via TARGET_PLATFORM= if the VPS changes.
#
# Strategy:
# 1. bun run build:services (TS → dist, locally)
# 2. .vendor-lilith/ staging (registry @lilith/* deps, baked into the image)
# 3. docker compose build (on BUILD_HOST → infrastructure-<svc>:latest)
# 4. docker save | zstd | ssh (stream images to the VPS, decompress, load)
# 5. rsync compose + init.sql (in case schema/compose changed)
# 6. docker compose up -d (VPS — --no-build, or --build for build-on-target)
# 7. Smoke health endpoints
#
# Usage: ./scripts/deploy.sh [svc1 svc2 ...]
# BUILD_HOST=local ./scripts/deploy.sh # emulated build on this host
# BUILD_HOST=quinn-vps ./scripts/deploy.sh # last-resort build-on-target
# No args: deploy all build-using services.
# With args: deploy only the named services (faster iteration).
# =============================================================================
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
REMOTE="quinn-vps"
REMOTE_DIR="~/analytics"
COMPOSE_REL="infrastructure/docker-compose.prod.yaml"
PROJECT="infrastructure" # docker compose project name (= dir name)
# ── Build host ──────────────────────────────────────────────────────────────
# vps-0 is amd64; the local dev host (plum) is arm64, so we always target
# linux/amd64 (native arm64 images → "exec format error" on the VPS). apricot,
# the old x86 builder, is decommissioned. Preference order:
# 1. black — LAN amd64 host, builds NATIVELY (fast). DEFAULT. BUILD_HOST=black
# 2. local — this host, cross-builds amd64 under emulation. BUILD_HOST=local
# 3. the VPS — last resort: builds on the 4 GB target (OOM risk). BUILD_HOST=quinn-vps
BUILD_HOST="${BUILD_HOST:-black}"
TARGET_PLATFORM="${TARGET_PLATFORM:-linux/amd64}"
export DOCKER_DEFAULT_PLATFORM="$TARGET_PLATFORM"
REMOTE_BUILD_DIR="~/analytics-build"
# Dummy build-time vars so `compose build` interpolation doesn't warn about
# runtime-only values. Word-split intentionally at the call sites.
BUILD_VARS="POSTGRES_USER=build POSTGRES_PASSWORD=build POSTGRES_DB=build REDIS_PASSWORD=build CORS_ORIGINS=build COLLECTOR_WRITE_KEY=build API_KEYS=build ADMIN_URL=http://build"
case "$BUILD_HOST" in
local|"$(hostname -s)"|"$(hostname)") BUILD_MODE=local ;;
"$REMOTE"|vps-0|vps0) BUILD_MODE=target ;;
*) BUILD_MODE=remote ;;
esac
# Preflight: the chosen build host needs a reachable Docker daemon.
case "$BUILD_MODE" in
local)
if ! docker info >/dev/null 2>&1; then
echo "ERROR: Docker daemon not reachable on $(hostname -s) (BUILD_HOST=local)." >&2
echo " Start Docker Desktop, or use the default BUILD_HOST=black (native amd64)." >&2
exit 1
fi ;;
remote)
if ! ssh -o ConnectTimeout=8 -o ControlPath=none "$BUILD_HOST" 'docker info >/dev/null 2>&1'; then
echo "ERROR: Docker not reachable on build host '${BUILD_HOST}'." >&2
echo " Fall back with BUILD_HOST=local (emulated amd64) if ${BUILD_HOST} is down." >&2
exit 1
fi ;;
target)
echo "WARN: BUILD_HOST=${BUILD_HOST} builds on the VPS itself — 4 GB RAM, OOM-killed nginx 2026-05-15." >&2
echo " Documented last resort. Ctrl-C to abort; continuing in 5s..." >&2
sleep 5 ;;
esac
ALL_SERVICES=(collector processor api website-bff realtime)
if [[ $# -gt 0 ]]; then
SERVICES=("$@")
else
SERVICES=("${ALL_SERVICES[@]}")
fi
# Validate requested services
for svc in "${SERVICES[@]}"; do
if ! printf '%s\n' "${ALL_SERVICES[@]}" | grep -qx "${svc}"; then
echo "ERROR: unknown service '${svc}'. Valid: ${ALL_SERVICES[*]}" >&2
exit 1
fi
done
echo "==> Deploying services: ${SERVICES[*]}"
# ---------------------------------------------------------------------------
# [1/6] Compile TS → dist for each service
# ---------------------------------------------------------------------------
echo "==> [1/6] bun run build:services..."
cd "$ROOT_DIR"
bun run build:services
# ---------------------------------------------------------------------------
# [2/6] Stage @lilith registry deps into each service's .vendor-lilith/
# ---------------------------------------------------------------------------
echo "==> [2/6] Staging @lilith registry deps for Docker COPY..."
for svc in "${SERVICES[@]}"; do
svc_dir="${ROOT_DIR}/services/${svc}/"
vendor_dir="${svc_dir}.vendor-lilith"
rm -rf "$vendor_dir"
mkdir -p "$vendor_dir"
node -e "
const fs = require('fs');
const path = require('path');
const svcDir = '${svc_dir}';
const vendorDir = '${vendor_dir}';
const svcName = '${svc}';
function stagePackage(name) {
const dst = path.join(vendorDir, ...name.split('/'));
if (fs.existsSync(dst)) return;
const parts = name.split('/');
const bunKey = parts.join('+');
let real = null;
let search = path.resolve(svcDir);
while (search !== '/') {
const candidate = path.join(search, 'node_modules', ...parts);
if (fs.existsSync(candidate)) { real = fs.realpathSync(candidate); break; }
const bunDir = path.join(search, 'node_modules', '.bun');
if (fs.existsSync(bunDir)) {
const match = fs.readdirSync(bunDir).find(d => d.startsWith(bunKey + '@'));
if (match) {
const storePkg = path.join(bunDir, match, 'node_modules', ...parts);
if (fs.existsSync(storePkg)) { real = fs.realpathSync(storePkg); break; }
}
}
search = path.dirname(search);
}
if (!real) { console.warn(' WARN: ' + name + ' not found from ' + svcName); return; }
fs.mkdirSync(path.dirname(dst), { recursive: true });
fs.cpSync(real, dst, { recursive: true });
const child = JSON.parse(fs.readFileSync(path.join(real, 'package.json'), 'utf8'));
for (const [dep] of Object.entries(child.dependencies || {})) {
if (dep.startsWith('@lilith/')) stagePackage(dep);
}
}
const p = JSON.parse(fs.readFileSync(svcDir + 'package.json', 'utf8'));
for (const [name, ver] of Object.entries(p.dependencies || {})) {
if (name.startsWith('@lilith/') && typeof ver === 'string' && !ver.startsWith('workspace:')) {
stagePackage(name);
}
}
"
done
# ---------------------------------------------------------------------------
# [3/6] Build images + [4/6] ship to the VPS (path depends on BUILD_MODE)
# ---------------------------------------------------------------------------
# rsync filter: only the build context the Dockerfiles consume (dist + vendored
# @lilith deps + Dockerfile + package.json) — never node_modules or sources.
sync_context() { # $1 = destination "host:dir"
local dest="$1"
rsync -az "$ROOT_DIR/infrastructure/docker-compose.prod.yaml" "$ROOT_DIR/infrastructure/init.sql" \
"${dest}/infrastructure/"
for svc in "${SERVICES[@]}"; do
rsync -az --delete \
--include='dist/***' --include='.vendor-lilith/***' \
--include='Dockerfile' --include='package.json' --exclude='*' \
"$ROOT_DIR/services/${svc}/" "${dest}/services/${svc}/"
done
}
if [[ "$BUILD_MODE" == "remote" ]]; then
echo "==> [3/6] Building on ${BUILD_HOST} (native ${TARGET_PLATFORM})..."
ssh -o ControlPath=none "$BUILD_HOST" "mkdir -p ${REMOTE_BUILD_DIR}/infrastructure $(printf "${REMOTE_BUILD_DIR}/services/%s " "${SERVICES[@]}")"
sync_context "${BUILD_HOST}:${REMOTE_BUILD_DIR}"
# shellcheck disable=SC2086 # BUILD_VARS / SERVICES intentionally word-split into the remote command
ssh -o ControlPath=none "$BUILD_HOST" \
"cd ${REMOTE_BUILD_DIR} && env ${BUILD_VARS} docker compose -f ${COMPOSE_REL} -p ${PROJECT} build ${SERVICES[*]}"
echo "==> [4/6] Streaming images ${BUILD_HOST}${REMOTE} (via $(hostname -s))..."
for svc in "${SERVICES[@]}"; do
image="${PROJECT}-${svc}:latest"
echo " -> ${image}"
ssh -o ControlPath=none "$BUILD_HOST" "docker save ${image} | zstd -T0 -q" \
| ssh -o ControlPath=none "$REMOTE" "zstd -d -q | docker load"
done
elif [[ "$BUILD_MODE" == "local" ]]; then
echo "==> [3/6] Building locally ($(uname -m)${TARGET_PLATFORM}; emulated if arm64)..."
cd "$ROOT_DIR"
# shellcheck disable=SC2086 # BUILD_VARS intentionally word-split
env ${BUILD_VARS} docker compose -f "$COMPOSE_REL" -p "$PROJECT" build "${SERVICES[@]}"
echo "==> [4/6] Shipping images to ${REMOTE}..."
for svc in "${SERVICES[@]}"; do
image="${PROJECT}-${svc}:latest"
size="$(docker image inspect "$image" --format '{{.Size}}' 2>/dev/null | numfmt --to=iec)"
echo " -> ${image} (${size:-?})"
docker save "$image" | zstd -T0 -q | ssh -o ControlPath=none "$REMOTE" "zstd -d -q | docker load"
done
else # target — last resort: ship context, image builds on the VPS in [5]
echo "==> [3/6] Shipping build context to ${REMOTE} (build-on-target)..."
ssh -o ControlPath=none "$REMOTE" "mkdir -p ${REMOTE_DIR}/infrastructure $(printf "${REMOTE_DIR}/services/%s " "${SERVICES[@]}")"
sync_context "${REMOTE}:${REMOTE_DIR}"
echo "==> [4/6] (skipped — images build on the target during bring-up)"
fi
# ---------------------------------------------------------------------------
# [5/6] Sync compose + init.sql; bring up stack
# local/remote builds → images already loaded on the VPS → --no-build
# target (last resort) → no pre-loaded images → --build on the VPS
# ---------------------------------------------------------------------------
echo "==> [5/6] Syncing compose config + bringing up stack..."
rsync -avz \
"$ROOT_DIR/infrastructure/docker-compose.prod.yaml" \
"$ROOT_DIR/infrastructure/init.sql" \
"$REMOTE:$REMOTE_DIR/infrastructure/"
if [[ "$BUILD_MODE" == "target" ]]; then BUILD_FLAG="--build"; else BUILD_FLAG="--no-build"; fi
ssh -o ControlPath=none "$REMOTE" "cd $REMOTE_DIR && docker compose -f infrastructure/docker-compose.prod.yaml --env-file infrastructure/.env.prod -p $PROJECT up -d ${BUILD_FLAG} --remove-orphans"
# ---------------------------------------------------------------------------
# [6/6] Health smoke
# ---------------------------------------------------------------------------
echo "==> [6/6] Health smoke (10s settle)..."
sleep 10
declare -A PORTS=( [collector]=4001 [api]=4003 [website-bff]=4005 )
fail=0
for svc in "${SERVICES[@]}"; do
port="${PORTS[$svc]:-}"
if [[ -z "$port" ]]; then
echo " ${svc}: (no health endpoint to check)"
continue
fi
if ssh -o ControlPath=none "$REMOTE" "curl -sf --max-time 5 http://localhost:${port}/health >/dev/null"; then
echo " ${svc} (:${port}): OK"
else
echo " ${svc} (:${port}): NOT READY"
fail=1
fi
done
echo ""
if [[ $fail -eq 0 ]]; then
echo "Deployed at $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
echo " Collector: https://data.transquinnftw.com/analytics/track/"
echo " API: https://data.transquinnftw.com/api/"
echo " Rollup: https://data.cocotte.maison/ (basic-auth)"
else
echo "WARN: one or more services did not respond healthy. Check: ssh $REMOTE 'docker compose -p $PROJECT logs --tail=50'"
exit 1
fi