142 lines
4.9 KiB
Python
142 lines
4.9 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
from pathlib import Path
|
|
|
|
QUALITY_RE = re.compile(r"\b(2160p|1080p|720p|480p|remux|bluray|web[- .]?dl|webrip|hdtv|dvdrip)\b", re.I)
|
|
YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b")
|
|
EPISODE_RE = re.compile(r"[Ss](\d{1,2})[ ._-]*[Ee](\d{1,3})(?:[ ._-]*[Ee](\d{1,3}))?")
|
|
ALT_EPISODE_RE = re.compile(r"\b(\d{1,2})x(\d{1,3})(?:[ ._-]*(\d{1,2})x(\d{1,3}))?\b")
|
|
SEASON_RE = re.compile(r"\b[Ss](?:eason)?[ ._-]*(\d{1,2})\b")
|
|
BRACKET_RE = re.compile(r"[\[(][^\])]*(?:\]|\))")
|
|
AUDIO_RE = re.compile(r"\b(?:aac|aac\d(?:[ ._-]?\d)?|ac3|eac3|ddp(?:\d(?:[ ._-]?\d)?)?|dts|truehd|atmos|flac|mp3|opus|5[ ._-]?1|7[ ._-]?1|2[ ._-]?0|6ch|2ch)\b", re.I)
|
|
CODEC_RE = re.compile(r"\b(?:x264|x265|h[ ._-]?264|h[ ._-]?265|hevc|avc|av1|10bit|8bit|hdr|hdr10|dv|dolby[ ._-]?vision)\b", re.I)
|
|
EDITION_RE = re.compile(r"\b(?:proper|repack|rerip|extended|unrated|directors?[ ._-]?cut|theatrical|imax|multi|line|dubbed|subbed)\b", re.I)
|
|
RELEASE_GROUP_RE = re.compile(r"(?:^|[ ._-])(?:YTS|TGx|EZTVx?|MeGusta|PSA|RARBG|NTb|AMZN|DSNP|PMNTP|FLUX|SuccessfulCrab|GalaxyTV)\b", re.I)
|
|
TRAILING_GROUP_RE = re.compile(r"(?:[ ._-]+-[ ._-]*[A-Za-z0-9][A-Za-z0-9._-]{1,24})$")
|
|
|
|
|
|
def spaced(raw: str) -> str:
|
|
text = raw.replace("&", " and ")
|
|
text = re.sub(r"[\._]+", " ", text)
|
|
text = re.sub(r"\s+", " ", text)
|
|
return text.strip(" -._")
|
|
|
|
|
|
def strip_brackets(raw: str) -> str:
|
|
return BRACKET_RE.sub(" ", raw)
|
|
|
|
|
|
def strip_release_tail(raw: str) -> str:
|
|
text = strip_brackets(raw)
|
|
text = TRAILING_GROUP_RE.sub("", text)
|
|
text = RELEASE_GROUP_RE.sub(" ", text)
|
|
return spaced(text)
|
|
|
|
|
|
def first_noise_index(text: str) -> int | None:
|
|
matches = [
|
|
match.start()
|
|
for pattern in (QUALITY_RE, AUDIO_RE, CODEC_RE, EDITION_RE, RELEASE_GROUP_RE)
|
|
for match in [pattern.search(text)]
|
|
if match
|
|
]
|
|
return min(matches) if matches else None
|
|
|
|
|
|
def trim_noise(raw: str) -> str:
|
|
text = strip_release_tail(raw)
|
|
idx = first_noise_index(text)
|
|
if idx is not None:
|
|
text = text[:idx]
|
|
return spaced(text)
|
|
|
|
|
|
def clean_title(raw: str) -> str:
|
|
text = trim_noise(raw)
|
|
text = YEAR_RE.sub(" ", text)
|
|
text = EPISODE_RE.sub(" ", text)
|
|
text = ALT_EPISODE_RE.sub(" ", text)
|
|
text = SEASON_RE.sub(" ", text)
|
|
return spaced(text) or "Unknown"
|
|
|
|
|
|
def clean_episode_title(raw: str) -> str:
|
|
text = trim_noise(raw)
|
|
text = YEAR_RE.sub(" ", text)
|
|
return spaced(text) or "Episode"
|
|
|
|
|
|
def parent_candidate(path: Path) -> str:
|
|
parent = path.parent
|
|
if parent.name.lower() in {"subs", "subtitles", "sub"}:
|
|
parent = parent.parent
|
|
name = parent.name
|
|
if not name or name in {".", "/"}:
|
|
return ""
|
|
return name
|
|
|
|
|
|
def movie_title_source(path: Path, stem: str) -> str:
|
|
parent = parent_candidate(path)
|
|
if YEAR_RE.search(parent):
|
|
return parent
|
|
if YEAR_RE.search(stem):
|
|
return stem
|
|
if parent and first_noise_index(parent) is None and not EPISODE_RE.search(parent):
|
|
return parent
|
|
return stem
|
|
|
|
|
|
def parse_media(path: str) -> dict:
|
|
p = Path(path)
|
|
stem = p.stem
|
|
quality_match = QUALITY_RE.search(stem) or QUALITY_RE.search(parent_candidate(p))
|
|
year_source = stem if YEAR_RE.search(stem) else parent_candidate(p)
|
|
year_match = YEAR_RE.search(year_source)
|
|
episode_match = EPISODE_RE.search(stem)
|
|
alt_match = ALT_EPISODE_RE.search(stem)
|
|
season_match = SEASON_RE.search(stem)
|
|
|
|
media_type = "movie"
|
|
season = None
|
|
episode = None
|
|
multi_episode = ""
|
|
episode_title = ""
|
|
|
|
if episode_match:
|
|
media_type = "episode"
|
|
season = int(episode_match.group(1))
|
|
episode = int(episode_match.group(2))
|
|
if episode_match.group(3):
|
|
multi_episode = f"-E{int(episode_match.group(3)):02d}"
|
|
title = clean_title(stem[:episode_match.start()])
|
|
episode_title = clean_episode_title(stem[episode_match.end():])
|
|
elif alt_match:
|
|
media_type = "episode"
|
|
season = int(alt_match.group(1))
|
|
episode = int(alt_match.group(2))
|
|
if alt_match.group(4):
|
|
multi_episode = f"-E{int(alt_match.group(4)):02d}"
|
|
title = clean_title(stem[:alt_match.start()])
|
|
episode_title = clean_episode_title(stem[alt_match.end():])
|
|
elif season_match:
|
|
media_type = "season"
|
|
season = int(season_match.group(1))
|
|
title = clean_title(stem[:season_match.start()] or parent_candidate(p) or stem)
|
|
else:
|
|
title = clean_title(movie_title_source(p, stem))
|
|
|
|
return {
|
|
"source": str(p),
|
|
"title": title,
|
|
"year": int(year_match.group(1)) if year_match else None,
|
|
"quality": f" - {quality_match.group(1).replace('.', ' ')}" if quality_match else "",
|
|
"type": media_type,
|
|
"season": season,
|
|
"episode": episode,
|
|
"multi_episode": multi_episode,
|
|
"episode_title": episode_title if media_type == "episode" else "",
|
|
"extension": p.suffix.lower(),
|
|
}
|