tv-anarchy/Sources/TVAnarchyCore/Metadata/FilenameParser.swift
Natalie 7ff780fe56 feat(apps/tv-anarchy): add restart command support
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-06-09 21:17:10 -07:00

94 lines
4.3 KiB
Swift

import Foundation
/// Regex-first filename structured fields. This is the deterministic core of
/// the metadata pipeline (handles the overwhelming majority of releases); an
/// MLX `TitleRefiner` is consulted only for the messy tail (see `refiner`).
public enum FilenameParser {
/// Optional model-backed refiner for titles regex can't cleanly extract.
/// nil by default the regex path stands alone. MLX plugs in here.
public static var refiner: (any TitleRefiner)?
public static func parse(path: String) -> ParsedFilename {
let base = (((path as NSString).lastPathComponent) as NSString).deletingPathExtension
return parse(filename: base)
}
public static func parse(filename: String) -> ParsedFilename {
let s = filename
let se = LibraryScanner.parseSxxEyy(s)
// Episode releases give S+E; season packs give only "S01" / "Season 6".
let season = se?.0
?? captureInt(s, "\\bS(\\d{1,2})(?![0-9])")
?? captureInt(s, "\\bSeason\\s*(\\d+)\\b")
let episode = se?.1
let year = firstInt(s, "\\b(19|20)\\d{2}\\b")
let quality = firstGroup(s, "\\b(2160p|1080p|720p|480p)\\b")
let codec = firstGroup(s, "\\b(x ?265|x ?264|h\\.?265|h\\.?264|hevc|xvid|divx|av1)\\b")
let releaseSource = firstGroup(s, "\\b(blu-?ray|web-?dl|web-?rip|hdtv|dvdrip|brrip|bdrip|remux|hdrip)\\b")
// Refiner consult happens on the DEGENERATE head (<2 chars), before the
// raw-name fallback otherwise noise-first names ("2160p.x265.GROUP")
// fall back to the full messy string and the model is never reachable.
var title = extractTitle(s)
if title.count < 2, let r = refiner, let refined = r.refineTitle(from: s), !refined.isEmpty {
title = refined
}
if title.isEmpty { title = s }
return ParsedFilename(title: title, year: year, season: season, episode: episode,
quality: quality.map(normalizeQuality), codec: codec,
releaseSource: releaseSource)
}
/// Title = the text before the earliest "noise" marker (SxxEyy, year,
/// quality, "Season N"), with separators tidied. Mirrors the library
/// normalization but keeps the year out of the title.
private static func extractTitle(_ s: String) -> String {
let markers = [
"S\\d{1,2}E\\d{1,3}",
"\\bS\\d{1,2}\\b",
"\\bSeason\\s*\\d+\\b",
"\\b(19|20)\\d{2}\\b",
"\\b(2160p|1080p|720p|480p)\\b",
]
var cut = s.count
for m in markers {
if let r = s.range(of: m, options: [.regularExpression, .caseInsensitive]) {
cut = min(cut, s.distance(from: s.startIndex, to: r.lowerBound))
}
}
let head = String(s.prefix(cut))
var title = head.replacingOccurrences(of: "[._-]+", with: " ", options: .regularExpression)
title = title.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression)
title = title.trimmingCharacters(in: .whitespaces)
// May be empty parse() handles the refiner consult + raw-name fallback.
return title
}
private static func normalizeQuality(_ q: String) -> String { q.lowercased() }
private static func firstGroup(_ s: String, _ pattern: String) -> String? {
guard let r = s.range(of: pattern, options: [.regularExpression, .caseInsensitive]) else { return nil }
return String(s[r])
}
private static func firstInt(_ s: String, _ pattern: String) -> Int? {
firstGroup(s, pattern).flatMap { Int($0) }
}
/// Capture group 1 of the first match, as Int (e.g. "S01" 1).
private static func captureInt(_ s: String, _ pattern: String) -> Int? {
guard let re = try? NSRegularExpression(pattern: pattern, options: [.caseInsensitive]) else { return nil }
let range = NSRange(s.startIndex..., in: s)
guard let m = re.firstMatch(in: s, range: range),
let r = Range(m.range(at: 1), in: s) else { return nil }
return Int(s[r])
}
}
/// Seam for a model-backed title cleaner (MLX). Implementations refine a title
/// from a messy filename when the regex path produced nothing useful.
public protocol TitleRefiner: Sendable {
func refineTitle(from filename: String) -> String?
}