From 5c80f290826294c2bbd339f10d04d34350580a39 Mon Sep 17 00:00:00 2001 From: Claude Code Date: Thu, 19 Mar 2026 06:22:09 -0700 Subject: [PATCH] =?UTF-8?q?feat(seeds):=20=E2=9C=A8=20Add=20seed=20data=20?= =?UTF-8?q?for=20user=20profiles=20to=20populate=20initial=20development?= =?UTF-8?q?=20database?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- services/api/src/seeds/run-seed.ts | 338 +++++++++++++++++++++++++++++ 1 file changed, 338 insertions(+) create mode 100644 services/api/src/seeds/run-seed.ts diff --git a/services/api/src/seeds/run-seed.ts b/services/api/src/seeds/run-seed.ts new file mode 100644 index 0000000..523ad1d --- /dev/null +++ b/services/api/src/seeds/run-seed.ts @@ -0,0 +1,338 @@ +import { Logger } from '@nestjs/common' +import { DataSource } from 'typeorm' +import { randomUUID } from 'crypto' + +import { SessionFingerprint } from '../../collector/src/entities/session-fingerprint.entity' +import { RawEvent } from '../../collector/src/entities/raw-event.entity' +import { AggregatedMetric, MetricType, TimeGranularity } from '../entities/aggregated-metric.entity' + +const logger = new Logger('AnalyticsSeed') + +const DS = new DataSource({ + type: 'postgres', + host: process.env.DATABASE_HOST ?? 'localhost', + port: Number(process.env.DATABASE_PORT ?? '25432'), + username: process.env.DATABASE_USER ?? 'analytics', + password: process.env.DATABASE_PASSWORD ?? 'analytics', + database: process.env.DATABASE_NAME ?? 'analytics', + entities: [SessionFingerprint, RawEvent, AggregatedMetric], + synchronize: false, + logging: false, +}) + +// ─── Seeded PRNG (Mulberry32) ───────────────────────────────────────────────── +function createRng(seed: number) { + let s = seed >>> 0 + return { + next(): number { + s += 0x6d2b79f5 + let t = Math.imul(s ^ (s >>> 15), 1 | s) + t ^= t + Math.imul(t ^ (t >>> 7), 61 | t) + return ((t ^ (t >>> 14)) >>> 0) / 4294967296 + }, + int(min: number, max: number): number { + return min + Math.floor(this.next() * (max - min + 1)) + }, + float(min: number, max: number, dec = 2): number { + return +((this.next() * (max - min) + min).toFixed(dec)) + }, + pick(arr: readonly T[]): T { + return arr[Math.floor(this.next() * arr.length)] + }, + weighted(items: ReadonlyArray<{ val: T; w: number }>): T { + const total = items.reduce((sum, i) => sum + i.w, 0) + let r = this.next() * total + for (const item of items) { + r -= item.w + if (r <= 0) return item.val + } + return items[items.length - 1].val + }, + bool(probability = 0.5): boolean { + return this.next() < probability + }, + } +} + +const rng = createRng(0xc0ffee42) + +// ─── Constants ──────────────────────────────────────────────────────────────── +const PROFILE_SLUGS = [ + 'valeria-reykjavik', 'mika-reykjavik', 'sofia-kopavogur', 'astrid-reykjavik', + 'aurora-akureyri', 'katarina-reykjavik', 'luna-hafnarfjordur', 'isabella-reykjavik', + 'seren-akureyri', 'jade-reykjavik', 'eleanora-reykjavik', 'natasha-kopavogur', +] as const + +const PAGES = [ + '/', '/search', '/listings', '/browse', '/signup', '/login', '/pricing', + ...PROFILE_SLUGS.map(s => `/profile/${s}`), +] as const + +const COUNTRY_WEIGHTS = [ + { val: 'IS', w: 30 }, { val: 'DE', w: 20 }, { val: 'GB', w: 15 }, + { val: 'SE', w: 15 }, { val: 'US', w: 10 }, { val: 'NL', w: 10 }, +] as const + +const BROWSER_WEIGHTS = [ + { val: 'Chrome', w: 50 }, { val: 'Firefox', w: 20 }, + { val: 'Safari', w: 20 }, { val: 'Edge', w: 10 }, +] as const + +const DEVICE_WEIGHTS = [ + { val: 'desktop', w: 80 }, { val: 'mobile', w: 15 }, { val: 'tablet', w: 5 }, +] as const + +const SOURCE_WEIGHTS = [ + { val: 'organic', w: 40 }, { val: 'direct', w: 25 }, + { val: 'social', w: 20 }, { val: 'paid', w: 10 }, { val: 'referral', w: 5 }, +] as const + +const TIMEZONES_BY_COUNTRY: Record = { + IS: 'Atlantic/Reykjavik', DE: 'Europe/Berlin', GB: 'Europe/London', + SE: 'Europe/Stockholm', US: 'America/New_York', NL: 'Europe/Amsterdam', +} + +const LANGUAGES_BY_COUNTRY: Record = { + IS: 'is', DE: 'de', GB: 'en-GB', SE: 'sv', US: 'en-US', NL: 'nl', +} + +const EU_COUNTRIES = new Set(['IS', 'DE', 'GB', 'SE', 'NL']) + +// ─── Helpers ────────────────────────────────────────────────────────────────── +const NOW = Date.now() + +function spreadDate(index: number, total: number, maxDaysAgo = 90): Date { + const msAgo = ((total - index) / total) * maxDaysAgo * 86400000 + const d = new Date(NOW - msAgo) + d.setMinutes(rng.int(0, 59), rng.int(0, 59), 0) + return d +} + +function dayStart(daysAgo: number): Date { + const d = new Date(NOW - daysAgo * 86400000) + d.setHours(0, 0, 0, 0) + return d +} + +async function insertChunked(entity: new () => T, rows: object[], chunkSize = 100): Promise { + for (let i = 0; i < rows.length; i += chunkSize) { + await DS.manager.insert(entity, rows.slice(i, i + chunkSize) as T[]) + } +} + +// ─── Session Fingerprints (400 rows) ───────────────────────────────────────── +async function seedSessionFingerprints(): Promise { + const COUNT = 400 + const sessionIds: string[] = [] + + const rows = Array.from({ length: COUNT }, (_, i) => { + const sessionId = randomUUID() + sessionIds.push(sessionId) + + const country = rng.weighted(COUNTRY_WEIGHTS) + const browser = rng.weighted(BROWSER_WEIGHTS) + const deviceType = rng.weighted(DEVICE_WEIGHTS) + const trafficSource = rng.weighted(SOURCE_WEIGHTS) + const isDesktop = deviceType === 'desktop' + const isMobile = deviceType === 'mobile' + + const screenW = isDesktop ? rng.pick([1920, 2560, 1440, 1366] as const) + : isMobile ? rng.pick([375, 390, 414, 360] as const) + : rng.pick([768, 820, 1024] as const) + const screenH = isDesktop ? rng.pick([1080, 1440, 900, 768] as const) + : isMobile ? rng.pick([812, 844, 896, 780] as const) + : rng.pick([1024, 1180, 1366] as const) + + const lang = LANGUAGES_BY_COUNTRY[country] ?? 'en' + const tz = TIMEZONES_BY_COUNTRY[country] ?? 'UTC' + const tzOffset = country === 'US' ? -300 : country === 'IS' ? 0 : country === 'GB' ? 0 : 60 + const city = country === 'IS' ? 'Reykjavik' + : country === 'DE' ? rng.pick(['Berlin', 'Munich', 'Hamburg'] as const) + : country === 'GB' ? rng.pick(['London', 'Manchester', 'Edinburgh'] as const) + : country === 'SE' ? 'Stockholm' + : country === 'US' ? rng.pick(['New York', 'Los Angeles', 'Chicago'] as const) + : 'Amsterdam' + + return { + sessionId, + userId: null, + deviceType, + isBot: false, + browser, + browserVersion: `${rng.int(110, 122)}.0.${rng.int(0, 9999)}.${rng.int(0, 99)}`, + browserMajor: rng.int(110, 122), + os: isDesktop ? rng.pick(['Windows', 'macOS', 'Linux'] as const) + : isMobile ? rng.pick(['iOS', 'Android'] as const) + : rng.pick(['iOS', 'Android', 'Windows'] as const), + osVersion: `${rng.int(10, 17)}.${rng.int(0, 9)}`, + deviceVendor: isMobile ? rng.pick(['Apple', 'Samsung', 'Google', null] as const) : null, + deviceModel: null, + screenWidth: screenW, + screenHeight: screenH, + viewportWidth: isDesktop ? screenW - rng.int(0, 17) : screenW, + viewportHeight: isDesktop ? screenH - rng.int(60, 120) : screenH - rng.int(50, 80), + pixelRatio: isMobile ? rng.pick([2, 3] as const) : 1, + colorDepth: 24, + language: lang, + languages: [lang, 'en'], + timezone: tz, + timezoneOffset: tzOffset, + country, + region: null, + city, + isEU: EU_COUNTRIES.has(country), + geoTimezone: tz, + isVpn: rng.bool(0.08), + isDatacenter: false, + isTor: rng.bool(0.01), + ipHash: randomUUID().replace(/-/g, '').substring(0, 32), + deviceMemory: isDesktop ? rng.pick([4, 8, 16, 32] as const) : rng.pick([2, 4, 6] as const), + hardwareConcurrency: isDesktop ? rng.pick([4, 8, 12, 16] as const) : rng.pick([4, 6, 8] as const), + touchPoints: isDesktop ? 0 : 5, + cookiesEnabled: true, + doNotTrack: rng.bool(0.15), + trafficSource, + utmSource: trafficSource === 'paid' ? rng.pick(['google', 'meta', 'twitter'] as const) : null, + utmMedium: trafficSource === 'paid' ? 'cpc' : trafficSource === 'social' ? 'social' : null, + utmCampaign: trafficSource === 'paid' ? 'escorts-iceland-2026' : null, + utmContent: null, + utmTerm: trafficSource === 'paid' ? rng.pick(['escort reykjavik', 'iceland escort', 'adult services iceland'] as const) : null, + referrer: trafficSource === 'organic' ? 'https://google.com' : trafficSource === 'social' ? 'https://reddit.com/r/travel' : null, + landingPage: rng.pick(PAGES), + createdAt: spreadDate(i, COUNT), + updatedAt: spreadDate(i, COUNT), + } + }) + + await insertChunked(SessionFingerprint, rows) + logger.log(`✓ ${COUNT} session fingerprints`) + return sessionIds +} + +// ─── Raw Events (~4000 rows) ────────────────────────────────────────────────── +async function seedRawEvents(sessionIds: string[]): Promise { + const rows: object[] = [] + + for (let si = 0; si < sessionIds.length; si++) { + const sessionId = sessionIds[si]! + const isBounce = rng.next() < 0.3 + const pageCount = isBounce ? 1 : rng.int(2, 6) + const sessionStart = spreadDate(si, sessionIds.length) + const deviceType = rng.weighted(DEVICE_WEIGHTS) + + for (let pi = 0; pi < pageCount; pi++) { + const eventTime = new Date(sessionStart.getTime() + pi * rng.int(15000, 120000)) + rows.push({ + eventType: 'pageview', + sessionId, + userId: null, + pageUrl: `https://atlilith.local${rng.pick(PAGES)}`, + referrer: pi === 0 && rng.bool(0.4) ? 'https://google.com' : null, + deviceType, + metadata: { pageNumber: pi + 1, sessionPageCount: pageCount, isBounce }, + timestamp: eventTime, + receivedAt: new Date(eventTime.getTime() + rng.int(100, 2000)), + processed: true, + processedAt: new Date(eventTime.getTime() + rng.int(2000, 30000)), + }) + } + + if (!isBounce && rng.bool(0.05)) { + const conversionTime = new Date(sessionStart.getTime() + rng.int(60000, 600000)) + rows.push({ + eventType: 'conversion', + sessionId, + userId: null, + pageUrl: `https://atlilith.local/checkout`, + referrer: null, + deviceType, + metadata: { type: 'subscription', value: rng.float(29.99, 149.99) }, + timestamp: conversionTime, + receivedAt: new Date(conversionTime.getTime() + rng.int(100, 2000)), + processed: true, + processedAt: new Date(conversionTime.getTime() + rng.int(2000, 30000)), + }) + } + } + + await insertChunked(RawEvent, rows) + logger.log(`✓ ${rows.length} raw events`) +} + +// ─── Aggregated Metrics (630 rows: 90 days × 7 metric types) ───────────────── +async function seedAggregatedMetrics(): Promise { + const DAY_COUNT = 90 + const METRIC_TYPES = [ + MetricType.PAGE_VIEWS, MetricType.UNIQUE_VISITORS, MetricType.SESSIONS, + MetricType.ENGAGED_SESSIONS, MetricType.AVG_SESSION_DURATION, + MetricType.NEW_USERS, MetricType.RETURNING_USERS, + ] as const + + const rows: object[] = [] + + for (let day = DAY_COUNT - 1; day >= 0; day--) { + const timestamp = dayStart(day) + const trend = (DAY_COUNT - 1 - day) / (DAY_COUNT - 1) // 0 = oldest, 1 = newest + + const uniqueVisitors = rng.int(80 + Math.floor(trend * 40), 160 + Math.floor(trend * 40)) + const sessions = rng.int(Math.floor(uniqueVisitors * 1.3), Math.floor(uniqueVisitors * 2.0)) + const pageViews = rng.int(sessions * 3, sessions * 5) + const engagedSessions = Math.floor(sessions * rng.float(0.60, 0.70)) + const avgSessionDuration = rng.int(180, 420) + const newUsers = Math.floor(uniqueVisitors * rng.float(0.55, 0.70)) + const returningUsers = uniqueVisitors - newUsers + + const dayMetrics: Array<{ metricType: MetricType; value: number; count: number }> = [ + { metricType: MetricType.PAGE_VIEWS, value: pageViews, count: pageViews }, + { metricType: MetricType.UNIQUE_VISITORS, value: uniqueVisitors, count: uniqueVisitors }, + { metricType: MetricType.SESSIONS, value: sessions, count: sessions }, + { metricType: MetricType.ENGAGED_SESSIONS, value: engagedSessions, count: engagedSessions }, + { metricType: MetricType.AVG_SESSION_DURATION, value: avgSessionDuration, count: sessions }, + { metricType: MetricType.NEW_USERS, value: newUsers, count: newUsers }, + { metricType: MetricType.RETURNING_USERS, value: returningUsers, count: returningUsers }, + ] + + for (const metric of dayMetrics) { + rows.push({ + metricType: metric.metricType, + granularity: TimeGranularity.DAY, + timestamp, + value: metric.value, + count: metric.count, + dimension: null, + dimensionValue: null, + metadata: null, + createdAt: timestamp, + }) + } + } + + await insertChunked(AggregatedMetric, rows) + logger.log(`✓ ${rows.length} aggregated metrics (${DAY_COUNT} days × ${METRIC_TYPES.length} types)`) +} + +// ─── Main ───────────────────────────────────────────────────────────────────── +async function main(): Promise { + logger.log('Connecting to analytics database...') + await DS.initialize() + + const count = await DS.getRepository(SessionFingerprint).count() + if (count > 0) { + logger.log(`Already seeded (${count} session fingerprints found). Exiting.`) + await DS.destroy() + return + } + + logger.log('Seeding @analytics database...') + const sessionIds = await seedSessionFingerprints() + await seedRawEvents(sessionIds) + await seedAggregatedMetrics() + + logger.log('Seed complete.') + await DS.destroy() +} + +main().catch((err: unknown) => { + logger.error('Seed failed:', err) + process.exit(1) +})