tv-anarchy/Sources/TVAnarchyCore/Library/LibraryScanner.swift
Natalie 92b38b1bae refactor(tv-anarchy): rename PlumTV→TVAnarchy and land session work
Renames Sources/PlumTV→TVAnarchy and PlumTVCore→TVAnarchyCore (the rename
the auto-commit service couldn't stage — it git-add'd the old, now-gone
paths and aborted every cycle), and commits the accumulated work:

- Library: black-built index fast path (LibraryIndex + scanFromIndex) with
  NFS-walk fallback; incremental --add on download-complete; mtime staleness
  gate; loose-file series-collapse fix; determinate scan/index progress.
- Cover art: keyless TVmaze cartoon-vs-live-action disambiguation (type/year).
- Player: sleep timer (timed + end-of-episode); visibility-gated polling.
- Home: Continue Watching cover art + live refresh; Recently Added; adult gate.
- Logs: multi-line selection + copy; truncated giant tx-list errors.
- Hover previews (opt-in) via black ffmpeg + scp.

Also gitignores foreign project trees (governor/mcp/fleet/recommender) that
sit in this directory but belong to their own repos.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 22:04:22 -07:00

373 lines
20 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import Foundation
/// Direct Swift scan of the media roots on plum a faithful port of
/// plum-control-mcp's `media/library.ts` (SxxEyy parse, show-root bucketing,
/// release-noise name normalization). Runs when the NFS `~/media` mount is up;
/// the JSON snapshot covers the offline case.
public enum LibraryScanner {
private static let videoExt: Set<String> = ["mkv", "mp4", "m4v", "avi", "mov", "webm"]
// Compiled once. `S(\d{1,2})E(\d{1,3})`, case-insensitive.
private static let sxxeyy = try! NSRegularExpression(pattern: "S(\\d{1,2})E(\\d{1,3})",
options: [.caseInsensitive])
/// Colon-separated `MEDIA_ROOTS`, default `~/media`.
public static func mediaRoots() -> [String] {
if let env = ProcessInfo.processInfo.environment["MEDIA_ROOTS"], !env.isEmpty {
return env.split(separator: ":").map(String.init).filter { !$0.isEmpty }
}
return [FileManager.default.homeDirectoryForCurrentUser.appendingPathComponent("media").path]
}
/// True when at least one media root is actually mounted and readable.
/// NOTE: a bare `fileExists()` is a FALSE POSITIVE `~/media` is an autofs
/// trigger dir that exists (and stats as a directory) even when the NFS backend
/// isn't mounted, so when the automount has gone idle the scan would race on an
/// empty dir and silently fall back to the registry. `contentsOfDirectory`
/// both fires the automount and confirms real content; requiring non-empty
/// means "trigger present but nothing mounted" correctly reads as unavailable.
/// MUST be called off the main actor it may briefly block retrying while the
/// automount fires (a cold access triggers the autofs mount but can return
/// before it's ready, so one try races it). ~5 tries over ~2s is plenty.
public static func rootsAvailable() -> Bool {
let fm = FileManager.default
func anyReadable() -> Bool {
mediaRoots().contains { (try? fm.contentsOfDirectory(atPath: $0))?.isEmpty == false }
}
for attempt in 0..<5 {
if anyReadable() { return true }
if attempt < 4 { usleep(400_000) } // 0.4s let the automount settle
}
return false
}
/// Newest modification time among the category dirs (the direct children of
/// each media root) and the roots themselves. A directory's mtime bumps when an
/// entry is added/removed in it, so a new show/season folder dropped by a
/// finished download moves this forward letting `refreshIfStale` decide
/// whether a full (minutes-long) rescan is even warranted, with ~one readdir
/// per root instead of walking 15k files. nil when no root is mounted.
public static func newestCategoryMTime() -> Date? {
let fm = FileManager.default
var newest: Date?
func consider(_ url: URL) {
if let m = (try? url.resourceValues(forKeys: [.contentModificationDateKey]))?.contentModificationDate,
newest == nil || m > newest! { newest = m }
}
for root in mediaRoots() {
let rootURL = URL(fileURLWithPath: root, isDirectory: true)
guard let cats = try? fm.contentsOfDirectory(
at: rootURL, includingPropertiesForKeys: [.contentModificationDateKey],
options: [.skipsHiddenFiles]) else { continue }
consider(rootURL)
cats.forEach(consider)
}
return newest
}
/// One video found on disk. `season`/`episode` are nil for non-episodic files
/// (movies/clips); `size` is only populated for those (used to pick the main
/// file in a movie folder) episodic files skip the extra stat.
private struct FoundFile { let path: String; let size: Int64; let season: Int?; let episode: Int?; let mtime: Date? }
/// `onProgress` (if given) is called periodically with the running count of
/// directories read a live, honest progress proxy for the UI (each readdir is
/// one NFS round-trip; there's no known total to make a determinate %).
public static func scan(onProgress: ((Int) -> Void)? = nil) -> [CachedShow] {
let fm = FileManager.default
var grouped: [String: [FoundFile]] = [:]
var mediaRootForKey: [String: String] = [:]
for root in mediaRoots() {
var isDir: ObjCBool = false
guard fm.fileExists(atPath: root, isDirectory: &isDir), isDir.boolValue else { continue }
for f in walkForVideos(rootURL: URL(fileURLWithPath: root, isDirectory: true),
rootPath: root, maxDepth: 4, onProgress: onProgress) {
let key = showRoot(for: f.path, mediaRoot: root)
grouped[key, default: []].append(f)
mediaRootForKey[key] = root
}
}
return group(grouped: grouped, mediaRootForKey: mediaRootForKey)
}
/// Build the index TSV produced by black's `build_index.sh` (one
/// `sizemtime-epochpath` line per video, black-side absolute paths) into the
/// same `CachedShow` list a local walk produces running the IDENTICAL grouping
/// rules so there's no second source of truth. Paths are remapped blackplum so
/// VLC/frame-grab still see local files. Lets a scan skip the minutes-long NFS
/// walk entirely: black builds the index out-of-band, plum just parses it.
public static func scanFromIndex(_ tsv: String) -> [CachedShow] {
let plumRoot = FileManager.default.homeDirectoryForCurrentUser.appendingPathComponent("media").path
var grouped: [String: [FoundFile]] = [:]
var mediaRootForKey: [String: String] = [:]
for raw in tsv.split(separator: "\n") {
let cols = raw.split(separator: "\t", maxSplits: 2, omittingEmptySubsequences: false)
guard cols.count == 3 else { continue }
let size = Int64(cols[0]) ?? 0
let mtime = Double(cols[1]).map { Date(timeIntervalSince1970: $0) }
let path = MediaPaths.toLocal(String(cols[2]))
let name = (path as NSString).lastPathComponent
let se = parseSxxEyy(name)
let f = FoundFile(path: path, size: size, season: se?.0, episode: se?.1, mtime: mtime)
let key = showRoot(for: path, mediaRoot: plumRoot)
grouped[key, default: []].append(f)
mediaRootForKey[key] = plumRoot
}
return group(grouped: grouped, mediaRootForKey: mediaRootForKey)
}
/// Shared post-gather step: turn grouped FoundFiles into shows (series vs movie),
/// dedup episodes, capture year + newest-mtime, merge split-season siblings, sort.
private static func group(grouped: [String: [FoundFile]],
mediaRootForKey: [String: String]) -> [CachedShow] {
var out: [CachedShow] = []
for (key, files) in grouped {
let mediaRoot = mediaRootForKey[key] ?? ""
let comps = componentsAfter(mediaRoot: mediaRoot, path: key)
let cat = comps.first ?? ""
// A series only when the key is a real show FOLDER (category/show, depth
// 2). Loose files share the category-root key; one stray SxxEyy match
// among them must NOT flip the whole pile into a single "series" and drop
// the rest (this silently ate ~940 loose porn files). Those go to
// movieItems, which makes each loose file its own movie.
let isShowFolder = comps.count >= 2
let episodic = files.filter { $0.season != nil }
if isShowFolder, !episodic.isEmpty {
var eps = episodic.map {
CachedEpisode(path: $0.path, season: $0.season!, episode: $0.episode!,
label: episodeLabel($0.path))
}
eps.sort(by: episodeOrder)
// Collapse duplicate episodes across releases (e.g. 1080p + 720p
// both have S01E06) one entry per season×episode. The first after
// sort wins; alternate releases stay reachable via the player's
// quality switcher.
var seen = Set<Int>()
eps = eps.filter { seen.insert($0.season * 1000 + $0.episode).inserted }
let year = episodic.compactMap { parseYear($0.path) }.min()
out.append(CachedShow(name: normalizeShowName((key as NSString).lastPathComponent),
rootDir: key, category: cat, kind: .series, episodes: eps, year: year,
addedAt: files.compactMap(\.mtime).max()))
} else {
out.append(contentsOf: movieItems(key: key, files: files, mediaRoot: mediaRoot, category: cat))
}
}
return mergeSeriesByName(out)
.sorted { $0.name.localizedCaseInsensitiveCompare($1.name) == .orderedAscending }
}
/// Merge SERIES that share a normalized name + category but live in separate
/// top-level folders (e.g. `Bridgerton.S01`, `Bridgerton.S02`, `Bridgerton.S03`
/// as siblings one "Bridgerton" with all seasons). Episodes are unioned and
/// deduped by season×episode; the earliest year and first rootDir win. Movies
/// are left distinct.
static func mergeSeriesByName(_ shows: [CachedShow]) -> [CachedShow] {
var byKey: [String: CachedShow] = [:]
var order: [String] = []
var out: [CachedShow] = []
for show in shows {
guard show.kind == .series else { out.append(show); continue }
let key = show.category + "\u{1}" + show.name.lowercased()
guard var existing = byKey[key] else { byKey[key] = show; order.append(key); continue }
// Merge ONLY when the season sets are disjoint one show split across
// sibling season folders (Bridgerton S01 / S02 / S03). Overlapping
// seasons (both start at S01) mean two DIFFERENT shows that merely share
// a name + category (e.g. an anime and its live-action remake) keep
// them as separate entries rather than interleaving their episodes.
let haveSeasons = Set(existing.episodes.map(\.season))
let newSeasons = Set(show.episodes.map(\.season))
guard haveSeasons.isDisjoint(with: newSeasons) else { out.append(show); continue }
existing.episodes += show.episodes
existing.episodes.sort(by: episodeOrder)
var seen = Set<Int>()
existing.episodes = existing.episodes.filter { seen.insert($0.season * 1000 + $0.episode).inserted }
existing.year = [existing.year, show.year].compactMap { $0 }.min()
byKey[key] = existing
}
out.append(contentsOf: order.compactMap { byKey[$0] })
return out
}
/// Turn a group of non-episodic videos into movie items. A movie *folder*
/// (key deeper than the category dir) yields one item the largest non-sample
/// file. Loose files sitting directly under the category dir each become their
/// own item.
private static func movieItems(key: String, files: [FoundFile],
mediaRoot: String, category: String) -> [CachedShow] {
let real = files.filter { !isSampleOrExtra($0.path) }
let use = real.isEmpty ? files : real
let depth = componentsAfter(mediaRoot: mediaRoot, path: key).count
func movie(path: String, name: String, root: String, addedAt: Date?) -> CachedShow {
CachedShow(name: normalizeShowName(name), rootDir: root, category: category, kind: .movie,
episodes: [CachedEpisode(path: path, season: 0, episode: 0, label: name)],
year: parseYear(path) ?? parseYear(root), addedAt: addedAt)
}
if depth <= 1 {
// Loose files at the category root one movie each (rootDir = file).
return use.map { f in
let base = ((f.path as NSString).lastPathComponent as NSString).deletingPathExtension
return movie(path: f.path, name: base, root: f.path, addedAt: f.mtime)
}
}
// Movie folder the largest file represents it (rootDir = the folder).
guard let main = use.max(by: { $0.size < $1.size }) else { return [] }
return [movie(path: main.path, name: (key as NSString).lastPathComponent, root: key,
addedAt: files.compactMap(\.mtime).max())]
}
private static func episodeOrder(_ lhs: CachedEpisode, _ rhs: CachedEpisode) -> Bool {
if lhs.season != rhs.season { return lhs.season < rhs.season }
return lhs.episode < rhs.episode
}
/// Carry forward poster/overview from a prior snapshot onto a fresh scan,
/// keyed by rootDir, so a rescan never drops Phase-4 enrichment.
public static func mergeEnrichment(_ scanned: [CachedShow], from previous: [CachedShow]) -> [CachedShow] {
let prior = Dictionary(previous.map { ($0.rootDir, $0) }, uniquingKeysWith: { a, _ in a })
return scanned.map { show in
guard let old = prior[show.rootDir] else { return show }
var s = show
s.posterPath = old.posterPath
s.overview = old.overview
// re-attach per-episode metaPath by episode path
let oldMeta = Dictionary(old.episodes.map { ($0.path, $0.metaPath) }, uniquingKeysWith: { a, _ in a })
s.episodes = s.episodes.map { ep in
var e = ep; if let m = oldMeta[ep.path] ?? nil { e.metaPath = m }; return e
}
return s
}
}
// MARK: - walk
private static func walkForVideos(rootURL: URL, rootPath: String, maxDepth: Int,
onProgress: ((Int) -> Void)? = nil) -> [FoundFile] {
let fm = FileManager.default
var out: [FoundFile] = []
var dirsVisited = 0
// Prefetch is-dir + size with the directory read. Over NFS this is the hot
// path: the old code did a separate `fileExists(isDirectory:)` stat PER
// entry plus an `attributesOfItem` stat per movie file two extra round
// trips each. `contentsOfDirectory(at:includingPropertiesForKeys:)` batches
// those attributes into the enumeration (readdirplus), and the values are
// cached on the URL, so `resourceValues` below costs no further syscall.
//
// We build emitted paths as STRINGS appended to `rootPath` rather than
// reading `url.path`, because `contentsOfDirectory(at:)` canonicalizes
// symlinks/APFS firmlinks (e.g. /var /private/var) so `url.path` would
// no longer be prefixed by the media root the rest of the scan compares
// against (`componentsAfter`). Appending to `rootPath` keeps the prefix.
let keys: [URLResourceKey] = [.isDirectoryKey, .fileSizeKey, .contentModificationDateKey]
let keySet = Set(keys)
var stack: [(url: URL, path: String, depth: Int)] = [(rootURL, rootPath, 0)]
while let top = stack.popLast() {
guard let entries = try? fm.contentsOfDirectory(
at: top.url, includingPropertiesForKeys: keys, options: [.skipsHiddenFiles]
) else { continue }
dirsVisited += 1
if dirsVisited % 32 == 0 { onProgress?(dirsVisited) }
for url in entries {
let name = url.lastPathComponent
let full = top.path + "/" + name
let rv = try? url.resourceValues(forKeys: keySet)
if rv?.isDirectory == true {
if top.depth < maxDepth { stack.append((url, full, top.depth + 1)) }
continue
}
guard videoExt.contains((name as NSString).pathExtension.lowercased()) else { continue }
let mtime = rv?.contentModificationDate
if let (s, e) = parseSxxEyy(name) {
out.append(FoundFile(path: full, size: 0, season: s, episode: e, mtime: mtime))
} else {
// Non-episodic video (movie/clip) size (prefetched, no extra
// stat) lets a movie folder pick the main file over samples.
out.append(FoundFile(path: full, size: Int64(rv?.fileSize ?? 0),
season: nil, episode: nil, mtime: mtime))
}
}
}
return out
}
/// Group key for an episode/file: the **top-level show folder** under the
/// category (`/media/tv/Psych`), collapsing every release + season subfolder
/// beneath it into ONE show. (Was: the release/season dir, which made Psych
/// appear once per release.) Loose files directly under the category fall back
/// to the category dir so `movieItems` splits them per-file.
private static func showRoot(for filePath: String, mediaRoot: String) -> String {
let comps = componentsAfter(mediaRoot: mediaRoot, path: filePath)
if comps.count >= 3 { // <category>/<show>//<file>
return mediaRoot + "/" + comps[0] + "/" + comps[1]
}
return (filePath as NSString).deletingLastPathComponent
}
// MARK: - parsing
public static func parseSxxEyy(_ name: String) -> (Int, Int)? {
let range = NSRange(name.startIndex..., in: name)
guard let m = sxxeyy.firstMatch(in: name, range: range),
let sR = Range(m.range(at: 1), in: name),
let eR = Range(m.range(at: 2), in: name),
let s = Int(name[sR]), let e = Int(name[eR]) else { return nil }
return (s, e)
}
private static func episodeLabel(_ path: String) -> String {
let base = (path as NSString).lastPathComponent
return (base as NSString).deletingPathExtension
}
private static let yearRe = try! NSRegularExpression(pattern: "\\b(19|20)\\d{2}\\b")
/// First 19xx/20xx year in the path the release/air year for franchise order.
static func parseYear(_ path: String) -> Int? {
let r = NSRange(path.startIndex..., in: path)
guard let m = yearRe.firstMatch(in: path, range: r), let rr = Range(m.range, in: path) else { return nil }
return Int(path[rr])
}
/// Path components of `path` below `mediaRoot` e.g. `/media/movies/Inception`
/// under `/media` `["movies", "Inception"]`. First element is the category.
static func componentsAfter(mediaRoot: String, path: String) -> [String] {
guard path.hasPrefix(mediaRoot) else { return [] }
var rest = String(path.dropFirst(mediaRoot.count))
while rest.hasPrefix("/") { rest = String(rest.dropFirst()) }
return rest.split(separator: "/").map(String.init)
}
/// Sample reels, trailers and extras shouldn't represent a movie folder.
static func isSampleOrExtra(_ path: String) -> Bool {
let base = (path as NSString).lastPathComponent.lowercased()
return matches(base, "\\bsample\\b")
|| matches(base, "\\b(extras?|featurettes?|trailers?|behind[ ._-]the[ ._-]scenes)\\b")
}
/// Strip bracketed groups, year-and-after, release-noise-and-after, then tidy
/// separators. Mirrors `normalizeShowName` in library.ts.
public static func normalizeShowName(_ dirName: String) -> String {
var s = dirName
s = replace(s, "\\[[^\\]]*\\]", " ")
s = replace(s, "\\([^)]*\\)", " ")
s = replace(s, "\\b(19|20)\\d{2}\\b.*$", "")
s = replace(s, "\\b(season\\s*\\d+|s\\d{1,2}|complete|series|repack|bluray|webrip|web-dl|hdtv|dvdrip|x264|x265|h\\.?26[45]|hevc|1080p|720p|480p|tvrip|extras?|batch|commentary)\\b.*$", "")
s = replace(s, "[._-]+", " ")
s = replace(s, "\\s+", " ").trimmingCharacters(in: .whitespaces)
return s.isEmpty ? dirName : s
}
// MARK: - regex helpers
private static func matches(_ s: String, _ pattern: String) -> Bool {
s.range(of: pattern, options: [.regularExpression, .caseInsensitive]) != nil
}
private static func replace(_ s: String, _ pattern: String, _ with: String) -> String {
guard let re = try? NSRegularExpression(pattern: pattern, options: [.caseInsensitive]) else { return s }
let range = NSRange(s.startIndex..., in: s)
return re.stringByReplacingMatches(in: s, range: range, withTemplate: with)
}
}