tv-anarchy/Sources/TVAnarchyCore/Library/LibraryScanner.swift

383 lines
21 KiB
Swift
Raw Normal View History

import Foundation
/// Direct Swift scan of local media roots a faithful port of tv-anarchy-mcp's
/// `media/library.ts` (SxxEyy parse, show-root bucketing, release-noise name
/// normalization). The primary library source is black's prebuilt index (see
/// `scanFromIndex`); this local walk only covers a configured, media-tree-structured
/// `MEDIA_ROOTS` dir. It never walks the old NFS `~/media` mount this project does
/// not depend on NFS. (Loose downloaded files are matched separately by
/// `DownloadsIndex`, by filename.)
public enum LibraryScanner {
private static let videoExt: Set<String> = ["mkv", "mp4", "m4v", "avi", "mov", "webm"]
// Compiled once. `S(\d{1,2})E(\d{1,3})`, case-insensitive.
private static let sxxeyy = try! NSRegularExpression(pattern: "S(\\d{1,2})E(\\d{1,3})",
options: [.caseInsensitive])
/// Media-tree-structured roots for the offline library walk (`<root>/<category>/
/// <show>/`). Colon-separated `MEDIA_ROOTS`, else none there is no `~/media`
/// default. (Distinct from `DownloadsIndex`, which matches loose downloaded files
/// by name and is NOT tree-structured.)
public static func mediaRoots() -> [String] {
if let env = ProcessInfo.processInfo.environment["MEDIA_ROOTS"], !env.isEmpty {
return env.split(separator: ":").map(String.init).filter { !$0.isEmpty }
}
return []
}
/// True when at least one local root exists and holds content. (No autofs to
/// coax now that `~/media` is gone, so a single readdir suffices.) MUST be
/// called off the main actor a directory read can block on slow disks.
public static func rootsAvailable() -> Bool {
let fm = FileManager.default
return mediaRoots().contains { (try? fm.contentsOfDirectory(atPath: $0))?.isEmpty == false }
}
/// One video found on disk. `season`/`episode` are nil for non-episodic files
/// (movies/clips); `size` is only populated for those (used to pick the main
/// file in a movie folder) episodic files skip the extra stat.
private struct FoundFile { let path: String; let size: Int64; let season: Int?; let episode: Int?; let mtime: Date? }
/// `onProgress` (if given) is called periodically with the running count of
/// directories read a live, honest progress proxy for the UI (each readdir is
/// one NFS round-trip; there's no known total to make a determinate %).
public static func scan(onProgress: ((Int) -> Void)? = nil) -> [CachedShow] {
let fm = FileManager.default
var grouped: [String: [FoundFile]] = [:]
var mediaRootForKey: [String: String] = [:]
for root in mediaRoots() {
var isDir: ObjCBool = false
guard fm.fileExists(atPath: root, isDirectory: &isDir), isDir.boolValue else { continue }
for f in walkForVideos(rootURL: URL(fileURLWithPath: root, isDirectory: true),
rootPath: root, maxDepth: 4, onProgress: onProgress) {
let key = showRoot(for: f.path, mediaRoot: root)
grouped[key, default: []].append(f)
mediaRootForKey[key] = root
}
}
return group(grouped: grouped, mediaRootForKey: mediaRootForKey)
}
/// Build the index TSV produced by black's `build_index.sh` (one
/// `sizemtime-epochpath` line per video, black-side absolute paths) into the
/// same `CachedShow` list a local walk produces running the IDENTICAL grouping
/// rules so there's no second source of truth. Paths are kept black-side (the
/// canonical identity); at launch a downloaded copy plays on plum's VLC and
/// anything else routes to black (mpv) see `PlayerController`. This is the
/// primary, NFS-free library path: black builds the index out-of-band, plum
/// just parses it.
public static func scanFromIndex(_ tsv: String) -> [CachedShow] {
let root = MediaPaths.remoteRoot
var grouped: [String: [FoundFile]] = [:]
var mediaRootForKey: [String: String] = [:]
for raw in tsv.split(separator: "\n") {
let cols = raw.split(separator: "\t", maxSplits: 2, omittingEmptySubsequences: false)
guard cols.count == 3 else { continue }
let size = Int64(cols[0]) ?? 0
let mtime = Double(cols[1]).map { Date(timeIntervalSince1970: $0) }
let path = String(cols[2])
let name = (path as NSString).lastPathComponent
let se = parseSxxEyy(name)
let f = FoundFile(path: path, size: size, season: se?.0, episode: se?.1, mtime: mtime)
let key = showRoot(for: path, mediaRoot: root)
grouped[key, default: []].append(f)
mediaRootForKey[key] = root
}
return group(grouped: grouped, mediaRootForKey: mediaRootForKey)
}
/// Shared post-gather step: turn grouped FoundFiles into shows (series vs movie),
/// dedup episodes, capture year + newest-mtime, merge split-season siblings, sort.
private static func group(grouped: [String: [FoundFile]],
mediaRootForKey: [String: String]) -> [CachedShow] {
var out: [CachedShow] = []
for (key, files) in grouped {
let mediaRoot = mediaRootForKey[key] ?? ""
let comps = componentsAfter(mediaRoot: mediaRoot, path: key)
let cat = comps.first ?? ""
// A series only when the key is a real show FOLDER (category/show, depth
// 2). Loose files share the category-root key; one stray SxxEyy match
// among them must NOT flip the whole pile into a single "series" and drop
// the rest (this silently ate ~940 loose porn files). Those go to
// movieItems, which makes each loose file its own movie.
let isShowFolder = comps.count >= 2
let episodic = files.filter { $0.season != nil }
if isShowFolder, !episodic.isEmpty {
let showName = normalizeShowName((key as NSString).lastPathComponent)
var eps = episodic.map { f in
let resolved = LibraryDisplayNames.resolve(showName: showName, path: f.path)
return CachedEpisode(path: f.path, season: f.season!, episode: f.episode!,
label: resolved.displayName,
displayName: resolved.displayName,
episodeTitle: resolved.episodeTitle)
}
eps.sort(by: episodeOrder)
// Collapse duplicate episodes across releases (e.g. 1080p + 720p
// both have S01E06) one entry per season×episode. The first after
// sort wins; alternate releases stay reachable via the player's
// quality switcher.
var seen = Set<Int>()
eps = eps.filter { seen.insert($0.season * 1000 + $0.episode).inserted }
let year = episodic.compactMap { parseYear($0.path) }.min()
out.append(CachedShow(name: showName,
rootDir: key, category: cat, kind: .series, episodes: eps, year: year,
addedAt: files.compactMap(\.mtime).max()))
} else {
out.append(contentsOf: movieItems(key: key, files: files, mediaRoot: mediaRoot, category: cat))
}
}
return mergeSeriesByName(out)
.sorted { $0.name.localizedCaseInsensitiveCompare($1.name) == .orderedAscending }
}
/// Fold freshly-scanned show(s) into an existing library snapshot union
/// episodes per `rootDir`, then run the same series-by-name merge as a full scan.
public static func mergeIngest(_ incoming: [CachedShow], into existing: [CachedShow]) -> [CachedShow] {
var byRoot = Dictionary(existing.map { ($0.rootDir, $0) }, uniquingKeysWith: { a, _ in a })
for show in incoming {
if var prev = byRoot[show.rootDir] {
prev.episodes.append(contentsOf: show.episodes)
prev.episodes.sort(by: episodeOrder)
var seen = Set<Int>()
prev.episodes = prev.episodes.filter { seen.insert($0.season * 1000 + $0.episode).inserted }
prev.year = [prev.year, show.year].compactMap { $0 }.min()
if let added = show.addedAt { prev.addedAt = max(prev.addedAt ?? added, added) }
byRoot[show.rootDir] = prev
} else {
byRoot[show.rootDir] = show
}
}
return mergeSeriesByName(Array(byRoot.values))
.sorted { $0.name.localizedCaseInsensitiveCompare($1.name) == .orderedAscending }
}
/// Merge SERIES that share a normalized name + category but live in separate
/// top-level folders (e.g. `Bridgerton.S01`, `Bridgerton.S02`, `Bridgerton.S03`
/// as siblings one "Bridgerton" with all seasons). Episodes are unioned and
/// deduped by season×episode; the earliest year and first rootDir win. Movies
/// are left distinct.
static func mergeSeriesByName(_ shows: [CachedShow]) -> [CachedShow] {
var byKey: [String: CachedShow] = [:]
var order: [String] = []
var out: [CachedShow] = []
for show in shows {
guard show.kind == .series else { out.append(show); continue }
let key = show.category + "\u{1}" + show.name.lowercased()
guard var existing = byKey[key] else { byKey[key] = show; order.append(key); continue }
// Merge ONLY when the season sets are disjoint one show split across
// sibling season folders (Bridgerton S01 / S02 / S03). Overlapping
// seasons (both start at S01) mean two DIFFERENT shows that merely share
// a name + category (e.g. an anime and its live-action remake) keep
// them as separate entries rather than interleaving their episodes.
let haveSeasons = Set(existing.episodes.map(\.season))
let newSeasons = Set(show.episodes.map(\.season))
guard haveSeasons.isDisjoint(with: newSeasons) else { out.append(show); continue }
existing.episodes += show.episodes
existing.episodes.sort(by: episodeOrder)
var seen = Set<Int>()
existing.episodes = existing.episodes.filter { seen.insert($0.season * 1000 + $0.episode).inserted }
existing.year = [existing.year, show.year].compactMap { $0 }.min()
byKey[key] = existing
}
out.append(contentsOf: order.compactMap { byKey[$0] })
return out
}
/// Turn a group of non-episodic videos into movie items. A movie *folder*
/// (key deeper than the category dir) yields one item the largest non-sample
/// file. Loose files sitting directly under the category dir each become their
/// own item.
private static func movieItems(key: String, files: [FoundFile],
mediaRoot: String, category: String) -> [CachedShow] {
let real = files.filter { !isSampleOrExtra($0.path) }
let use = real.isEmpty ? files : real
let depth = componentsAfter(mediaRoot: mediaRoot, path: key).count
func movie(path: String, name: String, root: String, addedAt: Date?) -> CachedShow {
CachedShow(name: normalizeShowName(name), rootDir: root, category: category, kind: .movie,
episodes: [CachedEpisode(path: path, season: 0, episode: 0, label: name)],
year: parseYear(path) ?? parseYear(root), addedAt: addedAt)
}
if depth <= 1 {
// Loose files at the category root one movie each (rootDir = file).
return use.map { f in
let base = ((f.path as NSString).lastPathComponent as NSString).deletingPathExtension
return movie(path: f.path, name: base, root: f.path, addedAt: f.mtime)
}
}
// Movie folder the largest file represents it (rootDir = the folder).
guard let main = use.max(by: { $0.size < $1.size }) else { return [] }
return [movie(path: main.path, name: (key as NSString).lastPathComponent, root: key,
addedAt: files.compactMap(\.mtime).max())]
}
private static func episodeOrder(_ lhs: CachedEpisode, _ rhs: CachedEpisode) -> Bool {
if lhs.season != rhs.season { return lhs.season < rhs.season }
return lhs.episode < rhs.episode
}
/// Carry forward poster/overview from a prior snapshot onto a fresh scan, then
/// backfill anything still missing from the `.meta` cache. Keys are normalized
/// to black-side form so enrichment survives the legacy-plum black-side path
/// switch (a raw rootDir match would silently drop every poster). The `.meta`
/// cache is the durable record: re-folding it here means one bad snapshot can
/// never lose artwork permanently.
public static func mergeEnrichment(_ scanned: [CachedShow], from previous: [CachedShow]) -> [CachedShow] {
let prior = Dictionary(previous.map { (MediaPaths.toRemote($0.rootDir), $0) },
uniquingKeysWith: { a, _ in a })
return scanned.map { show in
var s = show
let key = MediaPaths.toRemote(show.rootDir)
if let old = prior[key] {
s.posterPath = old.posterPath
s.overview = old.overview
// re-attach per-episode metaPath by (normalized) episode path
let oldMeta = Dictionary(old.episodes.map { (MediaPaths.toRemote($0.path), $0.metaPath) },
uniquingKeysWith: { a, _ in a })
s.episodes = s.episodes.map { ep in
var e = ep
if let m = oldMeta[MediaPaths.toRemote(ep.path)] ?? nil { e.metaPath = m }
return e
}
}
if s.posterPath == nil || s.overview == nil, let meta = MetaWriter.loadCache(forPath: key) {
if s.posterPath == nil { s.posterPath = meta.posterURL }
if s.overview == nil { s.overview = meta.overview }
}
return s
}
}
// MARK: - walk
private static func walkForVideos(rootURL: URL, rootPath: String, maxDepth: Int,
onProgress: ((Int) -> Void)? = nil) -> [FoundFile] {
let fm = FileManager.default
var out: [FoundFile] = []
var dirsVisited = 0
// Prefetch is-dir + size with the directory read. Over NFS this is the hot
// path: the old code did a separate `fileExists(isDirectory:)` stat PER
// entry plus an `attributesOfItem` stat per movie file two extra round
// trips each. `contentsOfDirectory(at:includingPropertiesForKeys:)` batches
// those attributes into the enumeration (readdirplus), and the values are
// cached on the URL, so `resourceValues` below costs no further syscall.
//
// We build emitted paths as STRINGS appended to `rootPath` rather than
// reading `url.path`, because `contentsOfDirectory(at:)` canonicalizes
// symlinks/APFS firmlinks (e.g. /var /private/var) so `url.path` would
// no longer be prefixed by the media root the rest of the scan compares
// against (`componentsAfter`). Appending to `rootPath` keeps the prefix.
let keys: [URLResourceKey] = [.isDirectoryKey, .fileSizeKey, .contentModificationDateKey]
let keySet = Set(keys)
var stack: [(url: URL, path: String, depth: Int)] = [(rootURL, rootPath, 0)]
while let top = stack.popLast() {
guard let entries = try? fm.contentsOfDirectory(
at: top.url, includingPropertiesForKeys: keys, options: [.skipsHiddenFiles]
) else { continue }
dirsVisited += 1
if dirsVisited % 32 == 0 { onProgress?(dirsVisited) }
for url in entries {
let name = url.lastPathComponent
let full = top.path + "/" + name
let rv = try? url.resourceValues(forKeys: keySet)
if rv?.isDirectory == true {
if top.depth < maxDepth { stack.append((url, full, top.depth + 1)) }
continue
}
guard videoExt.contains((name as NSString).pathExtension.lowercased()) else { continue }
let mtime = rv?.contentModificationDate
if let (s, e) = parseSxxEyy(name) {
out.append(FoundFile(path: full, size: 0, season: s, episode: e, mtime: mtime))
} else {
// Non-episodic video (movie/clip) size (prefetched, no extra
// stat) lets a movie folder pick the main file over samples.
out.append(FoundFile(path: full, size: Int64(rv?.fileSize ?? 0),
season: nil, episode: nil, mtime: mtime))
}
}
}
return out
}
/// Group key for an episode/file: the **top-level show folder** under the
/// category (`/media/tv/Psych`), collapsing every release + season subfolder
/// beneath it into ONE show. (Was: the release/season dir, which made Psych
/// appear once per release.) Loose files directly under the category fall back
/// to the category dir so `movieItems` splits them per-file.
private static func showRoot(for filePath: String, mediaRoot: String) -> String {
let comps = componentsAfter(mediaRoot: mediaRoot, path: filePath)
if comps.count >= 3 { // <category>/<show>//<file>
return mediaRoot + "/" + comps[0] + "/" + comps[1]
}
return (filePath as NSString).deletingLastPathComponent
}
// MARK: - parsing
public static func parseSxxEyy(_ name: String) -> (Int, Int)? {
let range = NSRange(name.startIndex..., in: name)
guard let m = sxxeyy.firstMatch(in: name, range: range),
let sR = Range(m.range(at: 1), in: name),
let eR = Range(m.range(at: 2), in: name),
let s = Int(name[sR]), let e = Int(name[eR]) else { return nil }
return (s, e)
}
private static func episodeLabel(_ path: String) -> String {
let base = (path as NSString).lastPathComponent
return (base as NSString).deletingPathExtension
}
private static let yearRe = try! NSRegularExpression(pattern: "\\b(19|20)\\d{2}\\b")
/// First 19xx/20xx year in the path the release/air year for franchise order.
static func parseYear(_ path: String) -> Int? {
let r = NSRange(path.startIndex..., in: path)
guard let m = yearRe.firstMatch(in: path, range: r), let rr = Range(m.range, in: path) else { return nil }
return Int(path[rr])
}
/// Path components of `path` below `mediaRoot` e.g. `/media/movies/Inception`
/// under `/media` `["movies", "Inception"]`. First element is the category.
static func componentsAfter(mediaRoot: String, path: String) -> [String] {
guard path.hasPrefix(mediaRoot) else { return [] }
var rest = String(path.dropFirst(mediaRoot.count))
while rest.hasPrefix("/") { rest = String(rest.dropFirst()) }
return rest.split(separator: "/").map(String.init)
}
/// Sample reels, trailers and extras shouldn't represent a movie folder.
static func isSampleOrExtra(_ path: String) -> Bool {
let base = (path as NSString).lastPathComponent.lowercased()
return matches(base, "\\bsample\\b")
|| matches(base, "\\b(extras?|featurettes?|trailers?|behind[ ._-]the[ ._-]scenes)\\b")
}
/// Strip bracketed groups, year-and-after, release-noise-and-after, then tidy
/// separators. Mirrors `normalizeShowName` in library.ts.
public static func normalizeShowName(_ dirName: String) -> String {
var s = dirName
s = replace(s, "\\[[^\\]]*\\]", " ")
s = replace(s, "\\([^)]*\\)", " ")
s = replace(s, "\\b(19|20)\\d{2}\\b.*$", "")
s = replace(s, "\\b(season\\s*\\d+|s\\d{1,2}|complete|series|repack|bluray|webrip|web-dl|hdtv|dvdrip|x264|x265|h\\.?26[45]|hevc|1080p|720p|480p|tvrip|extras?|batch|commentary)\\b.*$", "")
s = replace(s, "[._-]+", " ")
s = replace(s, "\\s+", " ").trimmingCharacters(in: .whitespaces)
return s.isEmpty ? dirName : s
}
// MARK: - regex helpers
private static func matches(_ s: String, _ pattern: String) -> Bool {
s.range(of: pattern, options: [.regularExpression, .caseInsensitive]) != nil
}
private static func replace(_ s: String, _ pattern: String, _ with: String) -> String {
guard let re = try? NSRegularExpression(pattern: pattern, options: [.caseInsensitive]) else { return s }
let range = NSRange(s.startIndex..., in: s)
return re.stringByReplacingMatches(in: s, range: range, withTemplate: with)
}
}