from __future__ import annotations import re from pathlib import Path QUALITY_RE = re.compile(r"\b(2160p|1080p|720p|480p|remux|bluray|web[- .]?dl|webrip|hdtv|dvdrip)\b", re.I) YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b") EPISODE_RE = re.compile(r"[Ss](\d{1,2})[ ._-]*[Ee](\d{1,3})(?:[ ._-]*[Ee](\d{1,3}))?") ALT_EPISODE_RE = re.compile(r"\b(\d{1,2})x(\d{1,3})(?:[ ._-]*(\d{1,2})x(\d{1,3}))?\b") SEASON_RE = re.compile(r"\b[Ss](?:eason)?[ ._-]*(\d{1,2})\b") BRACKET_RE = re.compile(r"[\[(][^\])]*(?:\]|\))") AUDIO_RE = re.compile(r"\b(?:aac|aac\d(?:[ ._-]?\d)?|ac3|eac3|ddp(?:\d(?:[ ._-]?\d)?)?|dts|truehd|atmos|flac|mp3|opus|5[ ._-]?1|7[ ._-]?1|2[ ._-]?0|6ch|2ch)\b", re.I) CODEC_RE = re.compile(r"\b(?:x264|x265|h[ ._-]?264|h[ ._-]?265|hevc|avc|av1|10bit|8bit|hdr|hdr10|dv|dolby[ ._-]?vision)\b", re.I) EDITION_RE = re.compile(r"\b(?:proper|repack|rerip|extended|unrated|directors?[ ._-]?cut|theatrical|imax|multi|line|dubbed|subbed)\b", re.I) RELEASE_GROUP_RE = re.compile(r"(?:^|[ ._-])(?:YTS|TGx|EZTVx?|MeGusta|PSA|RARBG|NTb|AMZN|DSNP|PMNTP|FLUX|SuccessfulCrab|GalaxyTV)\b", re.I) TRAILING_GROUP_RE = re.compile(r"(?:[ ._-]+-[ ._-]*[A-Za-z0-9][A-Za-z0-9._-]{1,24})$") def spaced(raw: str) -> str: text = raw.replace("&", " and ") text = re.sub(r"[\._]+", " ", text) text = re.sub(r"\s+", " ", text) return text.strip(" -._") def strip_brackets(raw: str) -> str: return BRACKET_RE.sub(" ", raw) def strip_release_tail(raw: str) -> str: text = strip_brackets(raw) text = TRAILING_GROUP_RE.sub("", text) text = RELEASE_GROUP_RE.sub(" ", text) return spaced(text) def first_noise_index(text: str) -> int | None: matches = [ match.start() for pattern in (QUALITY_RE, AUDIO_RE, CODEC_RE, EDITION_RE, RELEASE_GROUP_RE) for match in [pattern.search(text)] if match ] return min(matches) if matches else None def trim_noise(raw: str) -> str: text = strip_release_tail(raw) idx = first_noise_index(text) if idx is not None: text = text[:idx] return spaced(text) def clean_title(raw: str) -> str: text = trim_noise(raw) text = YEAR_RE.sub(" ", text) text = EPISODE_RE.sub(" ", text) text = ALT_EPISODE_RE.sub(" ", text) text = SEASON_RE.sub(" ", text) return spaced(text) or "Unknown" def clean_episode_title(raw: str) -> str: text = trim_noise(raw) text = YEAR_RE.sub(" ", text) return spaced(text) or "Episode" def parent_candidate(path: Path) -> str: parent = path.parent if parent.name.lower() in {"subs", "subtitles", "sub"}: parent = parent.parent name = parent.name if not name or name in {".", "/"}: return "" return name def movie_title_source(path: Path, stem: str) -> str: parent = parent_candidate(path) if YEAR_RE.search(parent): return parent if YEAR_RE.search(stem): return stem if parent and first_noise_index(parent) is None and not EPISODE_RE.search(parent): return parent return stem def parse_media(path: str) -> dict: p = Path(path) stem = p.stem quality_match = QUALITY_RE.search(stem) or QUALITY_RE.search(parent_candidate(p)) year_source = stem if YEAR_RE.search(stem) else parent_candidate(p) year_match = YEAR_RE.search(year_source) episode_match = EPISODE_RE.search(stem) alt_match = ALT_EPISODE_RE.search(stem) season_match = SEASON_RE.search(stem) media_type = "movie" season = None episode = None multi_episode = "" episode_title = "" if episode_match: media_type = "episode" season = int(episode_match.group(1)) episode = int(episode_match.group(2)) if episode_match.group(3): multi_episode = f"-E{int(episode_match.group(3)):02d}" title = clean_title(stem[:episode_match.start()]) episode_title = clean_episode_title(stem[episode_match.end():]) elif alt_match: media_type = "episode" season = int(alt_match.group(1)) episode = int(alt_match.group(2)) if alt_match.group(4): multi_episode = f"-E{int(alt_match.group(4)):02d}" title = clean_title(stem[:alt_match.start()]) episode_title = clean_episode_title(stem[alt_match.end():]) elif season_match: media_type = "season" season = int(season_match.group(1)) title = clean_title(stem[:season_match.start()] or parent_candidate(p) or stem) else: title = clean_title(movie_title_source(p, stem)) return { "source": str(p), "title": title, "year": int(year_match.group(1)) if year_match else None, "quality": f" - {quality_match.group(1).replace('.', ' ')}" if quality_match else "", "type": media_type, "season": season, "episode": episode, "multi_episode": multi_episode, "episode_title": episode_title if media_type == "episode" else "", "extension": p.suffix.lower(), }