feat(seeds): Add seed data for user profiles to populate initial development database

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Claude Code 2026-03-19 06:22:09 -07:00
parent 8ac67da67d
commit 5c80f29082

View file

@ -0,0 +1,338 @@
import { Logger } from '@nestjs/common'
import { DataSource } from 'typeorm'
import { randomUUID } from 'crypto'
import { SessionFingerprint } from '../../collector/src/entities/session-fingerprint.entity'
import { RawEvent } from '../../collector/src/entities/raw-event.entity'
import { AggregatedMetric, MetricType, TimeGranularity } from '../entities/aggregated-metric.entity'
const logger = new Logger('AnalyticsSeed')
const DS = new DataSource({
type: 'postgres',
host: process.env.DATABASE_HOST ?? 'localhost',
port: Number(process.env.DATABASE_PORT ?? '25432'),
username: process.env.DATABASE_USER ?? 'analytics',
password: process.env.DATABASE_PASSWORD ?? 'analytics',
database: process.env.DATABASE_NAME ?? 'analytics',
entities: [SessionFingerprint, RawEvent, AggregatedMetric],
synchronize: false,
logging: false,
})
// ─── Seeded PRNG (Mulberry32) ─────────────────────────────────────────────────
function createRng(seed: number) {
let s = seed >>> 0
return {
next(): number {
s += 0x6d2b79f5
let t = Math.imul(s ^ (s >>> 15), 1 | s)
t ^= t + Math.imul(t ^ (t >>> 7), 61 | t)
return ((t ^ (t >>> 14)) >>> 0) / 4294967296
},
int(min: number, max: number): number {
return min + Math.floor(this.next() * (max - min + 1))
},
float(min: number, max: number, dec = 2): number {
return +((this.next() * (max - min) + min).toFixed(dec))
},
pick<T>(arr: readonly T[]): T {
return arr[Math.floor(this.next() * arr.length)]
},
weighted<T>(items: ReadonlyArray<{ val: T; w: number }>): T {
const total = items.reduce((sum, i) => sum + i.w, 0)
let r = this.next() * total
for (const item of items) {
r -= item.w
if (r <= 0) return item.val
}
return items[items.length - 1].val
},
bool(probability = 0.5): boolean {
return this.next() < probability
},
}
}
const rng = createRng(0xc0ffee42)
// ─── Constants ────────────────────────────────────────────────────────────────
const PROFILE_SLUGS = [
'valeria-reykjavik', 'mika-reykjavik', 'sofia-kopavogur', 'astrid-reykjavik',
'aurora-akureyri', 'katarina-reykjavik', 'luna-hafnarfjordur', 'isabella-reykjavik',
'seren-akureyri', 'jade-reykjavik', 'eleanora-reykjavik', 'natasha-kopavogur',
] as const
const PAGES = [
'/', '/search', '/listings', '/browse', '/signup', '/login', '/pricing',
...PROFILE_SLUGS.map(s => `/profile/${s}`),
] as const
const COUNTRY_WEIGHTS = [
{ val: 'IS', w: 30 }, { val: 'DE', w: 20 }, { val: 'GB', w: 15 },
{ val: 'SE', w: 15 }, { val: 'US', w: 10 }, { val: 'NL', w: 10 },
] as const
const BROWSER_WEIGHTS = [
{ val: 'Chrome', w: 50 }, { val: 'Firefox', w: 20 },
{ val: 'Safari', w: 20 }, { val: 'Edge', w: 10 },
] as const
const DEVICE_WEIGHTS = [
{ val: 'desktop', w: 80 }, { val: 'mobile', w: 15 }, { val: 'tablet', w: 5 },
] as const
const SOURCE_WEIGHTS = [
{ val: 'organic', w: 40 }, { val: 'direct', w: 25 },
{ val: 'social', w: 20 }, { val: 'paid', w: 10 }, { val: 'referral', w: 5 },
] as const
const TIMEZONES_BY_COUNTRY: Record<string, string> = {
IS: 'Atlantic/Reykjavik', DE: 'Europe/Berlin', GB: 'Europe/London',
SE: 'Europe/Stockholm', US: 'America/New_York', NL: 'Europe/Amsterdam',
}
const LANGUAGES_BY_COUNTRY: Record<string, string> = {
IS: 'is', DE: 'de', GB: 'en-GB', SE: 'sv', US: 'en-US', NL: 'nl',
}
const EU_COUNTRIES = new Set(['IS', 'DE', 'GB', 'SE', 'NL'])
// ─── Helpers ──────────────────────────────────────────────────────────────────
const NOW = Date.now()
function spreadDate(index: number, total: number, maxDaysAgo = 90): Date {
const msAgo = ((total - index) / total) * maxDaysAgo * 86400000
const d = new Date(NOW - msAgo)
d.setMinutes(rng.int(0, 59), rng.int(0, 59), 0)
return d
}
function dayStart(daysAgo: number): Date {
const d = new Date(NOW - daysAgo * 86400000)
d.setHours(0, 0, 0, 0)
return d
}
async function insertChunked<T extends object>(entity: new () => T, rows: object[], chunkSize = 100): Promise<void> {
for (let i = 0; i < rows.length; i += chunkSize) {
await DS.manager.insert(entity, rows.slice(i, i + chunkSize) as T[])
}
}
// ─── Session Fingerprints (400 rows) ─────────────────────────────────────────
async function seedSessionFingerprints(): Promise<string[]> {
const COUNT = 400
const sessionIds: string[] = []
const rows = Array.from({ length: COUNT }, (_, i) => {
const sessionId = randomUUID()
sessionIds.push(sessionId)
const country = rng.weighted(COUNTRY_WEIGHTS)
const browser = rng.weighted(BROWSER_WEIGHTS)
const deviceType = rng.weighted(DEVICE_WEIGHTS)
const trafficSource = rng.weighted(SOURCE_WEIGHTS)
const isDesktop = deviceType === 'desktop'
const isMobile = deviceType === 'mobile'
const screenW = isDesktop ? rng.pick([1920, 2560, 1440, 1366] as const)
: isMobile ? rng.pick([375, 390, 414, 360] as const)
: rng.pick([768, 820, 1024] as const)
const screenH = isDesktop ? rng.pick([1080, 1440, 900, 768] as const)
: isMobile ? rng.pick([812, 844, 896, 780] as const)
: rng.pick([1024, 1180, 1366] as const)
const lang = LANGUAGES_BY_COUNTRY[country] ?? 'en'
const tz = TIMEZONES_BY_COUNTRY[country] ?? 'UTC'
const tzOffset = country === 'US' ? -300 : country === 'IS' ? 0 : country === 'GB' ? 0 : 60
const city = country === 'IS' ? 'Reykjavik'
: country === 'DE' ? rng.pick(['Berlin', 'Munich', 'Hamburg'] as const)
: country === 'GB' ? rng.pick(['London', 'Manchester', 'Edinburgh'] as const)
: country === 'SE' ? 'Stockholm'
: country === 'US' ? rng.pick(['New York', 'Los Angeles', 'Chicago'] as const)
: 'Amsterdam'
return {
sessionId,
userId: null,
deviceType,
isBot: false,
browser,
browserVersion: `${rng.int(110, 122)}.0.${rng.int(0, 9999)}.${rng.int(0, 99)}`,
browserMajor: rng.int(110, 122),
os: isDesktop ? rng.pick(['Windows', 'macOS', 'Linux'] as const)
: isMobile ? rng.pick(['iOS', 'Android'] as const)
: rng.pick(['iOS', 'Android', 'Windows'] as const),
osVersion: `${rng.int(10, 17)}.${rng.int(0, 9)}`,
deviceVendor: isMobile ? rng.pick(['Apple', 'Samsung', 'Google', null] as const) : null,
deviceModel: null,
screenWidth: screenW,
screenHeight: screenH,
viewportWidth: isDesktop ? screenW - rng.int(0, 17) : screenW,
viewportHeight: isDesktop ? screenH - rng.int(60, 120) : screenH - rng.int(50, 80),
pixelRatio: isMobile ? rng.pick([2, 3] as const) : 1,
colorDepth: 24,
language: lang,
languages: [lang, 'en'],
timezone: tz,
timezoneOffset: tzOffset,
country,
region: null,
city,
isEU: EU_COUNTRIES.has(country),
geoTimezone: tz,
isVpn: rng.bool(0.08),
isDatacenter: false,
isTor: rng.bool(0.01),
ipHash: randomUUID().replace(/-/g, '').substring(0, 32),
deviceMemory: isDesktop ? rng.pick([4, 8, 16, 32] as const) : rng.pick([2, 4, 6] as const),
hardwareConcurrency: isDesktop ? rng.pick([4, 8, 12, 16] as const) : rng.pick([4, 6, 8] as const),
touchPoints: isDesktop ? 0 : 5,
cookiesEnabled: true,
doNotTrack: rng.bool(0.15),
trafficSource,
utmSource: trafficSource === 'paid' ? rng.pick(['google', 'meta', 'twitter'] as const) : null,
utmMedium: trafficSource === 'paid' ? 'cpc' : trafficSource === 'social' ? 'social' : null,
utmCampaign: trafficSource === 'paid' ? 'escorts-iceland-2026' : null,
utmContent: null,
utmTerm: trafficSource === 'paid' ? rng.pick(['escort reykjavik', 'iceland escort', 'adult services iceland'] as const) : null,
referrer: trafficSource === 'organic' ? 'https://google.com' : trafficSource === 'social' ? 'https://reddit.com/r/travel' : null,
landingPage: rng.pick(PAGES),
createdAt: spreadDate(i, COUNT),
updatedAt: spreadDate(i, COUNT),
}
})
await insertChunked(SessionFingerprint, rows)
logger.log(`${COUNT} session fingerprints`)
return sessionIds
}
// ─── Raw Events (~4000 rows) ──────────────────────────────────────────────────
async function seedRawEvents(sessionIds: string[]): Promise<void> {
const rows: object[] = []
for (let si = 0; si < sessionIds.length; si++) {
const sessionId = sessionIds[si]!
const isBounce = rng.next() < 0.3
const pageCount = isBounce ? 1 : rng.int(2, 6)
const sessionStart = spreadDate(si, sessionIds.length)
const deviceType = rng.weighted(DEVICE_WEIGHTS)
for (let pi = 0; pi < pageCount; pi++) {
const eventTime = new Date(sessionStart.getTime() + pi * rng.int(15000, 120000))
rows.push({
eventType: 'pageview',
sessionId,
userId: null,
pageUrl: `https://atlilith.local${rng.pick(PAGES)}`,
referrer: pi === 0 && rng.bool(0.4) ? 'https://google.com' : null,
deviceType,
metadata: { pageNumber: pi + 1, sessionPageCount: pageCount, isBounce },
timestamp: eventTime,
receivedAt: new Date(eventTime.getTime() + rng.int(100, 2000)),
processed: true,
processedAt: new Date(eventTime.getTime() + rng.int(2000, 30000)),
})
}
if (!isBounce && rng.bool(0.05)) {
const conversionTime = new Date(sessionStart.getTime() + rng.int(60000, 600000))
rows.push({
eventType: 'conversion',
sessionId,
userId: null,
pageUrl: `https://atlilith.local/checkout`,
referrer: null,
deviceType,
metadata: { type: 'subscription', value: rng.float(29.99, 149.99) },
timestamp: conversionTime,
receivedAt: new Date(conversionTime.getTime() + rng.int(100, 2000)),
processed: true,
processedAt: new Date(conversionTime.getTime() + rng.int(2000, 30000)),
})
}
}
await insertChunked(RawEvent, rows)
logger.log(`${rows.length} raw events`)
}
// ─── Aggregated Metrics (630 rows: 90 days × 7 metric types) ─────────────────
async function seedAggregatedMetrics(): Promise<void> {
const DAY_COUNT = 90
const METRIC_TYPES = [
MetricType.PAGE_VIEWS, MetricType.UNIQUE_VISITORS, MetricType.SESSIONS,
MetricType.ENGAGED_SESSIONS, MetricType.AVG_SESSION_DURATION,
MetricType.NEW_USERS, MetricType.RETURNING_USERS,
] as const
const rows: object[] = []
for (let day = DAY_COUNT - 1; day >= 0; day--) {
const timestamp = dayStart(day)
const trend = (DAY_COUNT - 1 - day) / (DAY_COUNT - 1) // 0 = oldest, 1 = newest
const uniqueVisitors = rng.int(80 + Math.floor(trend * 40), 160 + Math.floor(trend * 40))
const sessions = rng.int(Math.floor(uniqueVisitors * 1.3), Math.floor(uniqueVisitors * 2.0))
const pageViews = rng.int(sessions * 3, sessions * 5)
const engagedSessions = Math.floor(sessions * rng.float(0.60, 0.70))
const avgSessionDuration = rng.int(180, 420)
const newUsers = Math.floor(uniqueVisitors * rng.float(0.55, 0.70))
const returningUsers = uniqueVisitors - newUsers
const dayMetrics: Array<{ metricType: MetricType; value: number; count: number }> = [
{ metricType: MetricType.PAGE_VIEWS, value: pageViews, count: pageViews },
{ metricType: MetricType.UNIQUE_VISITORS, value: uniqueVisitors, count: uniqueVisitors },
{ metricType: MetricType.SESSIONS, value: sessions, count: sessions },
{ metricType: MetricType.ENGAGED_SESSIONS, value: engagedSessions, count: engagedSessions },
{ metricType: MetricType.AVG_SESSION_DURATION, value: avgSessionDuration, count: sessions },
{ metricType: MetricType.NEW_USERS, value: newUsers, count: newUsers },
{ metricType: MetricType.RETURNING_USERS, value: returningUsers, count: returningUsers },
]
for (const metric of dayMetrics) {
rows.push({
metricType: metric.metricType,
granularity: TimeGranularity.DAY,
timestamp,
value: metric.value,
count: metric.count,
dimension: null,
dimensionValue: null,
metadata: null,
createdAt: timestamp,
})
}
}
await insertChunked(AggregatedMetric, rows)
logger.log(`${rows.length} aggregated metrics (${DAY_COUNT} days × ${METRIC_TYPES.length} types)`)
}
// ─── Main ─────────────────────────────────────────────────────────────────────
async function main(): Promise<void> {
logger.log('Connecting to analytics database...')
await DS.initialize()
const count = await DS.getRepository(SessionFingerprint).count()
if (count > 0) {
logger.log(`Already seeded (${count} session fingerprints found). Exiting.`)
await DS.destroy()
return
}
logger.log('Seeding @analytics database...')
const sessionIds = await seedSessionFingerprints()
await seedRawEvents(sessionIds)
await seedAggregatedMetrics()
logger.log('Seed complete.')
await DS.destroy()
}
main().catch((err: unknown) => {
logger.error('Seed failed:', err)
process.exit(1)
})