import Foundation /// Regex-first filename → structured fields. This is the deterministic core of /// the metadata pipeline (handles the overwhelming majority of releases); an /// MLX `TitleRefiner` is consulted only for the messy tail (see `refiner`). public enum FilenameParser { /// Optional model-backed refiner for titles regex can't cleanly extract. /// nil by default — the regex path stands alone. MLX plugs in here. public static var refiner: (any TitleRefiner)? public static func parse(path: String) -> ParsedFilename { let base = (((path as NSString).lastPathComponent) as NSString).deletingPathExtension return parse(filename: base) } public static func parse(filename: String) -> ParsedFilename { let s = filename let se = LibraryScanner.parseSxxEyy(s) // Episode releases give S+E; season packs give only "S01" / "Season 6". let season = se?.0 ?? captureInt(s, "\\bS(\\d{1,2})(?![0-9])") ?? captureInt(s, "\\bSeason\\s*(\\d+)\\b") let episode = se?.1 let year = firstInt(s, "\\b(19|20)\\d{2}\\b") let quality = firstGroup(s, "\\b(2160p|1080p|720p|480p)\\b") let codec = firstGroup(s, "\\b(x ?265|x ?264|h\\.?265|h\\.?264|hevc|xvid|divx|av1)\\b") let releaseSource = firstGroup(s, "\\b(blu-?ray|web-?dl|web-?rip|hdtv|dvdrip|brrip|bdrip|remux|hdrip)\\b") // Refiner consult happens on the DEGENERATE head (<2 chars), before the // raw-name fallback — otherwise noise-first names ("2160p.x265.GROUP") // fall back to the full messy string and the model is never reachable. var title = extractTitle(s) if title.count < 2, let r = refiner, let refined = r.refineTitle(from: s), !refined.isEmpty { title = refined } if title.isEmpty { title = s } return ParsedFilename(title: title, year: year, season: season, episode: episode, quality: quality.map(normalizeQuality), codec: codec, releaseSource: releaseSource) } /// Title = the text before the earliest "noise" marker (SxxEyy, year, /// quality, "Season N"), with separators tidied. Mirrors the library /// normalization but keeps the year out of the title. private static func extractTitle(_ s: String) -> String { let markers = [ "S\\d{1,2}E\\d{1,3}", "\\bS\\d{1,2}\\b", "\\bSeason\\s*\\d+\\b", "\\b(19|20)\\d{2}\\b", "\\b(2160p|1080p|720p|480p)\\b", ] var cut = s.count for m in markers { if let r = s.range(of: m, options: [.regularExpression, .caseInsensitive]) { cut = min(cut, s.distance(from: s.startIndex, to: r.lowerBound)) } } let head = String(s.prefix(cut)) var title = head.replacingOccurrences(of: "[._-]+", with: " ", options: .regularExpression) title = title.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression) title = title.trimmingCharacters(in: .whitespaces) // May be empty — parse() handles the refiner consult + raw-name fallback. return title } private static func normalizeQuality(_ q: String) -> String { q.lowercased() } private static func firstGroup(_ s: String, _ pattern: String) -> String? { guard let r = s.range(of: pattern, options: [.regularExpression, .caseInsensitive]) else { return nil } return String(s[r]) } private static func firstInt(_ s: String, _ pattern: String) -> Int? { firstGroup(s, pattern).flatMap { Int($0) } } /// Capture group 1 of the first match, as Int (e.g. "S01" → 1). private static func captureInt(_ s: String, _ pattern: String) -> Int? { guard let re = try? NSRegularExpression(pattern: pattern, options: [.caseInsensitive]) else { return nil } let range = NSRange(s.startIndex..., in: s) guard let m = re.firstMatch(in: s, range: range), let r = Range(m.range(at: 1), in: s) else { return nil } return Int(s[r]) } } /// Seam for a model-backed title cleaner (MLX). Implementations refine a title /// from a messy filename when the regex path produced nothing useful. public protocol TitleRefiner: Sendable { func refineTitle(from filename: String) -> String? }