tv-anarchy/Sources/TVAnarchyCore/Metadata/FilenameParser.swift
Natalie 92b38b1bae refactor(tv-anarchy): rename PlumTV→TVAnarchy and land session work
Renames Sources/PlumTV→TVAnarchy and PlumTVCore→TVAnarchyCore (the rename
the auto-commit service couldn't stage — it git-add'd the old, now-gone
paths and aborted every cycle), and commits the accumulated work:

- Library: black-built index fast path (LibraryIndex + scanFromIndex) with
  NFS-walk fallback; incremental --add on download-complete; mtime staleness
  gate; loose-file series-collapse fix; determinate scan/index progress.
- Cover art: keyless TVmaze cartoon-vs-live-action disambiguation (type/year).
- Player: sleep timer (timed + end-of-episode); visibility-gated polling.
- Home: Continue Watching cover art + live refresh; Recently Added; adult gate.
- Logs: multi-line selection + copy; truncated giant tx-list errors.
- Hover previews (opt-in) via black ffmpeg + scp.

Also gitignores foreign project trees (governor/mcp/fleet/recommender) that
sit in this directory but belong to their own repos.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 22:04:22 -07:00

89 lines
4 KiB
Swift

import Foundation
/// Regex-first filename structured fields. This is the deterministic core of
/// the metadata pipeline (handles the overwhelming majority of releases); an
/// MLX `TitleRefiner` is consulted only for the messy tail (see `refiner`).
public enum FilenameParser {
/// Optional model-backed refiner for titles regex can't cleanly extract.
/// nil by default the regex path stands alone. MLX plugs in here.
public static var refiner: (any TitleRefiner)?
public static func parse(path: String) -> ParsedFilename {
let base = (((path as NSString).lastPathComponent) as NSString).deletingPathExtension
return parse(filename: base)
}
public static func parse(filename: String) -> ParsedFilename {
let s = filename
let se = LibraryScanner.parseSxxEyy(s)
// Episode releases give S+E; season packs give only "S01" / "Season 6".
let season = se?.0
?? captureInt(s, "\\bS(\\d{1,2})(?![0-9])")
?? captureInt(s, "\\bSeason\\s*(\\d+)\\b")
let episode = se?.1
let year = firstInt(s, "\\b(19|20)\\d{2}\\b")
let quality = firstGroup(s, "\\b(2160p|1080p|720p|480p)\\b")
let codec = firstGroup(s, "\\b(x ?265|x ?264|h\\.?265|h\\.?264|hevc|xvid|divx|av1)\\b")
let releaseSource = firstGroup(s, "\\b(blu-?ray|web-?dl|web-?rip|hdtv|dvdrip|brrip|bdrip|remux|hdrip)\\b")
var title = extractTitle(s)
if title.count < 2, let r = refiner, let refined = r.refineTitle(from: s), !refined.isEmpty {
title = refined
}
return ParsedFilename(title: title, year: year, season: season, episode: episode,
quality: quality.map(normalizeQuality), codec: codec,
releaseSource: releaseSource)
}
/// Title = the text before the earliest "noise" marker (SxxEyy, year,
/// quality, "Season N"), with separators tidied. Mirrors the library
/// normalization but keeps the year out of the title.
private static func extractTitle(_ s: String) -> String {
let markers = [
"S\\d{1,2}E\\d{1,3}",
"\\bS\\d{1,2}\\b",
"\\bSeason\\s*\\d+\\b",
"\\b(19|20)\\d{2}\\b",
"\\b(2160p|1080p|720p|480p)\\b",
]
var cut = s.count
for m in markers {
if let r = s.range(of: m, options: [.regularExpression, .caseInsensitive]) {
cut = min(cut, s.distance(from: s.startIndex, to: r.lowerBound))
}
}
let head = String(s.prefix(cut))
var title = head.replacingOccurrences(of: "[._-]+", with: " ", options: .regularExpression)
title = title.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression)
title = title.trimmingCharacters(in: .whitespaces)
return title.isEmpty ? s : title
}
private static func normalizeQuality(_ q: String) -> String { q.lowercased() }
private static func firstGroup(_ s: String, _ pattern: String) -> String? {
guard let r = s.range(of: pattern, options: [.regularExpression, .caseInsensitive]) else { return nil }
return String(s[r])
}
private static func firstInt(_ s: String, _ pattern: String) -> Int? {
firstGroup(s, pattern).flatMap { Int($0) }
}
/// Capture group 1 of the first match, as Int (e.g. "S01" 1).
private static func captureInt(_ s: String, _ pattern: String) -> Int? {
guard let re = try? NSRegularExpression(pattern: pattern, options: [.caseInsensitive]) else { return nil }
let range = NSRange(s.startIndex..., in: s)
guard let m = re.firstMatch(in: s, range: range),
let r = Range(m.range(at: 1), in: s) else { return nil }
return Int(s[r])
}
}
/// Seam for a model-backed title cleaner (MLX). Implementations refine a title
/// from a messy filename when the regex path produced nothing useful.
public protocol TitleRefiner: Sendable {
func refineTitle(from filename: String) -> String?
}