94 lines
4.3 KiB
Swift
94 lines
4.3 KiB
Swift
import Foundation
|
|
|
|
/// Regex-first filename → structured fields. This is the deterministic core of
|
|
/// the metadata pipeline (handles the overwhelming majority of releases); an
|
|
/// MLX `TitleRefiner` is consulted only for the messy tail (see `refiner`).
|
|
public enum FilenameParser {
|
|
|
|
/// Optional model-backed refiner for titles regex can't cleanly extract.
|
|
/// nil by default — the regex path stands alone. MLX plugs in here.
|
|
public static var refiner: (any TitleRefiner)?
|
|
|
|
public static func parse(path: String) -> ParsedFilename {
|
|
let base = (((path as NSString).lastPathComponent) as NSString).deletingPathExtension
|
|
return parse(filename: base)
|
|
}
|
|
|
|
public static func parse(filename: String) -> ParsedFilename {
|
|
let s = filename
|
|
let se = LibraryScanner.parseSxxEyy(s)
|
|
// Episode releases give S+E; season packs give only "S01" / "Season 6".
|
|
let season = se?.0
|
|
?? captureInt(s, "\\bS(\\d{1,2})(?![0-9])")
|
|
?? captureInt(s, "\\bSeason\\s*(\\d+)\\b")
|
|
let episode = se?.1
|
|
let year = firstInt(s, "\\b(19|20)\\d{2}\\b")
|
|
let quality = firstGroup(s, "\\b(2160p|1080p|720p|480p)\\b")
|
|
let codec = firstGroup(s, "\\b(x ?265|x ?264|h\\.?265|h\\.?264|hevc|xvid|divx|av1)\\b")
|
|
let releaseSource = firstGroup(s, "\\b(blu-?ray|web-?dl|web-?rip|hdtv|dvdrip|brrip|bdrip|remux|hdrip)\\b")
|
|
|
|
// Refiner consult happens on the DEGENERATE head (<2 chars), before the
|
|
// raw-name fallback — otherwise noise-first names ("2160p.x265.GROUP")
|
|
// fall back to the full messy string and the model is never reachable.
|
|
var title = extractTitle(s)
|
|
if title.count < 2, let r = refiner, let refined = r.refineTitle(from: s), !refined.isEmpty {
|
|
title = refined
|
|
}
|
|
if title.isEmpty { title = s }
|
|
|
|
return ParsedFilename(title: title, year: year, season: season, episode: episode,
|
|
quality: quality.map(normalizeQuality), codec: codec,
|
|
releaseSource: releaseSource)
|
|
}
|
|
|
|
/// Title = the text before the earliest "noise" marker (SxxEyy, year,
|
|
/// quality, "Season N"), with separators tidied. Mirrors the library
|
|
/// normalization but keeps the year out of the title.
|
|
private static func extractTitle(_ s: String) -> String {
|
|
let markers = [
|
|
"S\\d{1,2}E\\d{1,3}",
|
|
"\\bS\\d{1,2}\\b",
|
|
"\\bSeason\\s*\\d+\\b",
|
|
"\\b(19|20)\\d{2}\\b",
|
|
"\\b(2160p|1080p|720p|480p)\\b",
|
|
]
|
|
var cut = s.count
|
|
for m in markers {
|
|
if let r = s.range(of: m, options: [.regularExpression, .caseInsensitive]) {
|
|
cut = min(cut, s.distance(from: s.startIndex, to: r.lowerBound))
|
|
}
|
|
}
|
|
let head = String(s.prefix(cut))
|
|
var title = head.replacingOccurrences(of: "[._-]+", with: " ", options: .regularExpression)
|
|
title = title.replacingOccurrences(of: "\\s+", with: " ", options: .regularExpression)
|
|
title = title.trimmingCharacters(in: .whitespaces)
|
|
// May be empty — parse() handles the refiner consult + raw-name fallback.
|
|
return title
|
|
}
|
|
|
|
private static func normalizeQuality(_ q: String) -> String { q.lowercased() }
|
|
|
|
private static func firstGroup(_ s: String, _ pattern: String) -> String? {
|
|
guard let r = s.range(of: pattern, options: [.regularExpression, .caseInsensitive]) else { return nil }
|
|
return String(s[r])
|
|
}
|
|
|
|
private static func firstInt(_ s: String, _ pattern: String) -> Int? {
|
|
firstGroup(s, pattern).flatMap { Int($0) }
|
|
}
|
|
|
|
/// Capture group 1 of the first match, as Int (e.g. "S01" → 1).
|
|
private static func captureInt(_ s: String, _ pattern: String) -> Int? {
|
|
guard let re = try? NSRegularExpression(pattern: pattern, options: [.caseInsensitive]) else { return nil }
|
|
let range = NSRange(s.startIndex..., in: s)
|
|
guard let m = re.firstMatch(in: s, range: range),
|
|
let r = Range(m.range(at: 1), in: s) else { return nil }
|
|
return Int(s[r])
|
|
}
|
|
}
|
|
|
|
/// Seam for a model-backed title cleaner (MLX). Implementations refine a title
|
|
/// from a messy filename when the regex path produced nothing useful.
|
|
public protocol TitleRefiner: Sendable {
|
|
func refineTitle(from filename: String) -> String?
|
|
}
|