229 lines
6.2 KiB
TypeScript
229 lines
6.2 KiB
TypeScript
import { Injectable } from '@nestjs/common';
|
|
import { InjectDataSource } from '@nestjs/typeorm';
|
|
import { DataSource } from 'typeorm';
|
|
|
|
export interface CorpRow {
|
|
corpId: number;
|
|
slug: string;
|
|
legalName: string;
|
|
visitors: number;
|
|
pageviews: number;
|
|
}
|
|
|
|
export interface DomainRow {
|
|
domainId: number;
|
|
corpId: number;
|
|
corpSlug: string;
|
|
hostname: string;
|
|
role: string;
|
|
visitors: number;
|
|
pageviews: number;
|
|
}
|
|
|
|
export interface FlowEdge {
|
|
fromCorpId: number;
|
|
fromCorpSlug: string;
|
|
toCorpId: number;
|
|
toCorpSlug: string;
|
|
visitors: number;
|
|
}
|
|
|
|
export interface OverlapCell {
|
|
corpAId: number;
|
|
corpASlug: string;
|
|
corpBId: number;
|
|
corpBSlug: string;
|
|
visitors: number;
|
|
}
|
|
|
|
/**
|
|
* Cross-corp, cross-domain analytics built on the visitor_id_daily +
|
|
* corp_id + domain_id dimensions stamped onto every raw_events row by the
|
|
* collector's TrackingService.
|
|
*/
|
|
@Injectable()
|
|
export class NetworkService {
|
|
constructor(
|
|
@InjectDataSource()
|
|
private readonly dataSource: DataSource,
|
|
) {}
|
|
|
|
/**
|
|
* Per-corp totals over the last N days.
|
|
* `visitors` = distinct visitor_id_daily within the window
|
|
* (collapses to distinct visitors per UTC day, multi-day visitors counted once per day).
|
|
*/
|
|
async getCorps(days: number): Promise<CorpRow[]> {
|
|
const rows = await this.dataSource.query<Array<{
|
|
corp_id: number;
|
|
slug: string;
|
|
legal_name: string;
|
|
visitors: string;
|
|
pageviews: string;
|
|
}>>(
|
|
`
|
|
SELECT
|
|
c.id AS corp_id,
|
|
c.slug AS slug,
|
|
c.legal_name AS legal_name,
|
|
COUNT(DISTINCT e.visitor_id_daily) AS visitors,
|
|
COUNT(e.id) AS pageviews
|
|
FROM corps c
|
|
LEFT JOIN raw_events e
|
|
ON e.corp_id = c.id
|
|
AND e."timestamp" > NOW() - ($1 || ' days')::interval
|
|
GROUP BY c.id, c.slug, c.legal_name
|
|
ORDER BY visitors DESC NULLS LAST, c.slug ASC
|
|
`,
|
|
[days],
|
|
);
|
|
return rows.map((r) => ({
|
|
corpId: Number(r.corp_id),
|
|
slug: r.slug,
|
|
legalName: r.legal_name,
|
|
visitors: Number(r.visitors ?? 0),
|
|
pageviews: Number(r.pageviews ?? 0),
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Per-domain table grouped by corp.
|
|
*/
|
|
async getDomains(days: number): Promise<DomainRow[]> {
|
|
const rows = await this.dataSource.query<Array<{
|
|
domain_id: number;
|
|
corp_id: number;
|
|
corp_slug: string;
|
|
hostname: string;
|
|
role: string;
|
|
visitors: string;
|
|
pageviews: string;
|
|
}>>(
|
|
`
|
|
SELECT
|
|
d.id AS domain_id,
|
|
d.corp_id AS corp_id,
|
|
c.slug AS corp_slug,
|
|
d.hostname AS hostname,
|
|
d.role AS role,
|
|
COUNT(DISTINCT e.visitor_id_daily) AS visitors,
|
|
COUNT(e.id) AS pageviews
|
|
FROM domains d
|
|
JOIN corps c ON c.id = d.corp_id
|
|
LEFT JOIN raw_events e
|
|
ON e.domain_id = d.id
|
|
AND e."timestamp" > NOW() - ($1 || ' days')::interval
|
|
GROUP BY d.id, d.corp_id, c.slug, d.hostname, d.role
|
|
ORDER BY pageviews DESC NULLS LAST, d.hostname ASC
|
|
`,
|
|
[days],
|
|
);
|
|
return rows.map((r) => ({
|
|
domainId: Number(r.domain_id),
|
|
corpId: Number(r.corp_id),
|
|
corpSlug: r.corp_slug,
|
|
hostname: r.hostname,
|
|
role: r.role,
|
|
visitors: Number(r.visitors ?? 0),
|
|
pageviews: Number(r.pageviews ?? 0),
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Sankey edges: corp A → corp B transitions. For each visitor_id_daily,
|
|
* walks events in time order and counts consecutive corp_id changes.
|
|
*/
|
|
async getFlow(days: number): Promise<FlowEdge[]> {
|
|
const rows = await this.dataSource.query<Array<{
|
|
from_corp_id: number;
|
|
from_slug: string;
|
|
to_corp_id: number;
|
|
to_slug: string;
|
|
visitors: string;
|
|
}>>(
|
|
`
|
|
WITH ordered AS (
|
|
SELECT
|
|
visitor_id_daily,
|
|
corp_id,
|
|
"timestamp",
|
|
LAG(corp_id) OVER (
|
|
PARTITION BY visitor_id_daily
|
|
ORDER BY "timestamp"
|
|
) AS prev_corp_id
|
|
FROM raw_events
|
|
WHERE visitor_id_daily IS NOT NULL
|
|
AND corp_id IS NOT NULL
|
|
AND "timestamp" > NOW() - ($1 || ' days')::interval
|
|
)
|
|
SELECT
|
|
prev_corp_id AS from_corp_id,
|
|
cfrom.slug AS from_slug,
|
|
corp_id AS to_corp_id,
|
|
cto.slug AS to_slug,
|
|
COUNT(DISTINCT visitor_id_daily) AS visitors
|
|
FROM ordered
|
|
JOIN corps cfrom ON cfrom.id = prev_corp_id
|
|
JOIN corps cto ON cto.id = corp_id
|
|
WHERE prev_corp_id IS NOT NULL
|
|
AND prev_corp_id <> corp_id
|
|
GROUP BY prev_corp_id, cfrom.slug, corp_id, cto.slug
|
|
ORDER BY visitors DESC
|
|
`,
|
|
[days],
|
|
);
|
|
return rows.map((r) => ({
|
|
fromCorpId: Number(r.from_corp_id),
|
|
fromCorpSlug: r.from_slug,
|
|
toCorpId: Number(r.to_corp_id),
|
|
toCorpSlug: r.to_slug,
|
|
visitors: Number(r.visitors),
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Symmetric overlap matrix: distinct visitors who touched BOTH corps in
|
|
* the window (any order). Returns upper triangle only (corpA < corpB).
|
|
*/
|
|
async getOverlap(days: number): Promise<OverlapCell[]> {
|
|
const rows = await this.dataSource.query<Array<{
|
|
corp_a_id: number;
|
|
corp_a_slug: string;
|
|
corp_b_id: number;
|
|
corp_b_slug: string;
|
|
visitors: string;
|
|
}>>(
|
|
`
|
|
WITH visitor_corps AS (
|
|
SELECT DISTINCT visitor_id_daily, corp_id
|
|
FROM raw_events
|
|
WHERE visitor_id_daily IS NOT NULL
|
|
AND corp_id IS NOT NULL
|
|
AND "timestamp" > NOW() - ($1 || ' days')::interval
|
|
)
|
|
SELECT
|
|
a.corp_id AS corp_a_id,
|
|
ca.slug AS corp_a_slug,
|
|
b.corp_id AS corp_b_id,
|
|
cb.slug AS corp_b_slug,
|
|
COUNT(DISTINCT a.visitor_id_daily) AS visitors
|
|
FROM visitor_corps a
|
|
JOIN visitor_corps b
|
|
ON a.visitor_id_daily = b.visitor_id_daily
|
|
AND a.corp_id < b.corp_id
|
|
JOIN corps ca ON ca.id = a.corp_id
|
|
JOIN corps cb ON cb.id = b.corp_id
|
|
GROUP BY a.corp_id, ca.slug, b.corp_id, cb.slug
|
|
ORDER BY visitors DESC
|
|
`,
|
|
[days],
|
|
);
|
|
return rows.map((r) => ({
|
|
corpAId: Number(r.corp_a_id),
|
|
corpASlug: r.corp_a_slug,
|
|
corpBId: Number(r.corp_b_id),
|
|
corpBSlug: r.corp_b_slug,
|
|
visitors: Number(r.visitors),
|
|
}));
|
|
}
|
|
}
|