from __future__ import annotations import os import re import time from collections import Counter from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path from .metadata import movie_metadata, series_metadata from .parser import clean_title, parse_media from .storage import drive_stats LIBRARY_ROOT_NAMES = {"movies", "shows", "tv", "tv shows"} TV_ROOT_NAMES = {"shows", "tv", "tv shows"} EPISODE_RE = re.compile(r"[Ss](\d{1,2})[ ._-]*[Ee](\d{1,3})") SEASON_FOLDER_RE = re.compile(r"season[ ._-]*(\d{1,2})", re.I) YEAR_RE = re.compile(r"\((19\d{2}|20\d{2})\)") ANY_YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b") VERSION_RE = re.compile(r"\b(2160p|1080p|720p|480p|remux|bluray|web[- .]?dl|webrip|hdtv|dvdrip|x264|x265|h[ ._-]?264|h[ ._-]?265|hevc|av1|hdr10?|dv|proper|repack|extended|unrated|directors?[ ._-]?cut|theatrical|imax)\b", re.I) EXTRA_FOLDER_NAMES = { "behind the scenes", "deleted scenes", "extras", "featurettes", "interviews", "samples", "scenes", "shorts", "trailers", } def library_roots(root: Path) -> list[Path]: matches = [] try: children = list(root.iterdir()) except OSError: return matches for child in children: if child.is_dir() and child.name.lower() in LIBRARY_ROOT_NAMES: matches.append(child) return matches def library_kind(library_root: Path) -> str: return "tv" if library_root.name.lower() in TV_ROOT_NAMES else "movie" def infer_library_kind(path: str) -> str: parts = {part.lower() for part in Path(path).parts} if parts & TV_ROOT_NAMES: return "tv" if "movies" in parts: return "movie" return "other" def split_library_path(path: str) -> tuple[str, list[str]]: parts = list(Path(path).parts) lowered = [part.lower() for part in parts] for root in LIBRARY_ROOT_NAMES: if root in lowered: idx = lowered.index(root) return parts[idx], parts[idx + 1:] return "", parts def identity_slug(title: str) -> str: return re.sub(r"[^a-z0-9]+", " ", title.lower()).strip() def clean_collection_title(name: str) -> tuple[str, int | None]: year_match = ANY_YEAR_RE.search(name) year = int(year_match.group(1)) if year_match else None title = clean_title(name) return title, year def merge_key(kind: str, title: str, year: int | None = None) -> str: slug = identity_slug(title) if kind == "movie": return f"movie::{slug}::{year or ''}" return f"tv::{slug}" def file_version(item: dict) -> dict: path = Path(item.get("path", "")) text = " ".join(part for part in [path.parent.name, path.stem] if part) tags = [] for match in VERSION_RE.finditer(text): tag = match.group(1).replace(".", " ").replace("_", " ") normalized = re.sub(r"\s+", " ", tag).strip() if normalized.lower() not in {existing.lower() for existing in tags}: tags.append(normalized) return { "path": item.get("path"), "name": item.get("name"), "drive": item.get("drive"), "size": item.get("size") or 0, "quality": next((tag for tag in tags if tag.lower() in {"2160p", "1080p", "720p", "480p"}), ""), "tags": tags[:8], } def is_extra_media(path: Path, library_root: Path, kind: str, app: dict) -> bool: try: relative = path.relative_to(library_root) except ValueError: relative = path parts = [part.lower().replace("_", " ").replace(".", " ") for part in relative.parts[:-1]] if kind == "movie" and any(part in EXTRA_FOLDER_NAMES for part in parts[1:]): return True lowered_name = path.name.lower().replace("_", " ").replace(".", " ") return any(keyword and keyword.lower() in lowered_name for keyword in app.get("extra_keywords", [])) def item_identity(item: dict) -> dict: root, rel = split_library_path(item.get("path", "")) kind = item.get("library") or infer_library_kind(item.get("path", "")) parsed = parse_media(item.get("path", item.get("name", ""))) if kind == "tv" and rel: title = clean_title(rel[0]) season = parsed.get("season") episode = parsed.get("episode") for part in rel: match = SEASON_FOLDER_RE.search(part) if match and not season: season = int(match.group(1)) return { "kind": "tv", "root": root, "title": title, "key": merge_key("tv", title), "season": season, "episode": episode, } title, year = clean_collection_title(rel[0] if rel else parsed["title"]) year = year or parsed.get("year") return { "kind": "movie", "root": root, "title": title, "year": year, "slug": identity_slug(title), "key": merge_key("movie", title, year), } def normalize_library(library: dict) -> dict: items = library.get("items", []) kinds = Counter() for item in items: kind = item.get("library") or infer_library_kind(item.get("path", "")) item["library"] = kind if kind in {"movie", "tv"}: kinds[kind] += 1 library["counts"] = { "movies": kinds.get("movie", 0), "tv": kinds.get("tv", 0), "total": len(items), } if "collections" not in library: library["collections"] = build_collections({}, items) return library def build_collections(config: dict, items: list[dict], enrich: bool = False) -> dict: movies: dict[str, dict] = {} series: dict[str, dict] = {} for item in items: identity = item_identity(item) if identity["kind"] == "tv": show = series.setdefault(identity["key"], { "key": identity["key"], "title": identity["title"], "library": "tv", "files": [], "seasons": {}, "metadata": {"title": identity["title"], "source": "filename", "seasons": {}}, }) show["files"].append(item) season_no = identity.get("season") or 0 episode_no = identity.get("episode") or 0 season = show["seasons"].setdefault(str(season_no), {"season": season_no, "episodes": {}}) episode = season["episodes"].setdefault(str(episode_no), { "season": season_no, "episode": episode_no, "title": f"S{season_no:02d}E{episode_no:02d}" if season_no and episode_no else item["name"], "files": [], "status": "present", }) episode["files"].append(item) else: key = identity["key"] if not identity.get("year"): existing_key = next((candidate_key for candidate_key, candidate in movies.items() if candidate.get("slug") == identity["slug"]), None) if existing_key: key = existing_key elif key not in movies: no_year_key = merge_key("movie", identity["title"], None) if no_year_key in movies: movies[key] = movies.pop(no_year_key) movies[key]["key"] = key movie = movies.setdefault(key, { "key": key, "title": identity["title"], "year": identity.get("year"), "slug": identity.get("slug"), "library": "movie", "files": [], "versions": [], "metadata": {"title": identity["title"], "source": "filename"}, }) movie["files"].append(item) movie["versions"].append(file_version(item)) if not movie.get("year") and identity.get("year"): movie["year"] = identity.get("year") if enrich and config: workers = int(config.get("app", {}).get("metadata_parallelism", 8)) tasks = {} with ThreadPoolExecutor(max_workers=max(1, min(workers, 12))) as executor: for movie in movies.values(): future = executor.submit(movie_metadata, config, movie["title"], movie.get("year")) tasks[future] = movie for show in series.values(): present_seasons = {int(season) for season in show["seasons"] if int(season) > 0} future = executor.submit(series_metadata, config, show["title"], present_seasons) tasks[future] = show for future in as_completed(tasks): try: tasks[future]["metadata"] = future.result() except Exception: pass today = time.strftime("%Y-%m-%d") for show in series.values(): for season_no, season_meta in show.get("metadata", {}).get("seasons", {}).items(): season = show["seasons"].setdefault(season_no, {"season": int(season_no), "episodes": {}}) for meta_episode in season_meta.get("episodes", []): key = str(meta_episode.get("episode") or 0) existing = season["episodes"].get(key) if existing: existing.update({ "title": meta_episode.get("title") or existing["title"], "air_date": meta_episode.get("air_date"), "overview": meta_episode.get("overview"), "still": meta_episode.get("still"), }) else: air_date = meta_episode.get("air_date") season["episodes"][key] = { **meta_episode, "files": [], "status": "upcoming" if air_date and air_date > today else "missing", } for season in show["seasons"].values(): season["episodes"] = sorted(season["episodes"].values(), key=lambda ep: ep.get("episode") or 0) show["seasons"] = sorted(show["seasons"].values(), key=lambda season: season["season"]) return { "movies": sorted(movies.values(), key=lambda movie: movie["title"].lower()), "series": sorted(series.values(), key=lambda show: show["title"].lower()), } def library_snapshot(config: dict) -> dict: items = [] extensions = Counter() ignored_dirs = {"$RECYCLE.BIN", "System Volume Information", ".Trash-1000"} app = config["app"] max_files = int(app.get("library_scan_max_files", 20000)) deadline = time.monotonic() + int(app.get("library_scan_timeout_seconds", 8)) scanned = 0 truncated = False for drive in config.get("drives", []): if scanned >= max_files or time.monotonic() >= deadline: truncated = True break root = Path(drive["path"]) if not root.exists(): continue for library_root in library_roots(root): kind = library_kind(library_root) for current, dirs, files in os.walk(library_root, onerror=lambda error: None): if scanned >= max_files or time.monotonic() >= deadline: truncated = True break dirs[:] = [name for name in dirs if name not in ignored_dirs] lower_files = {name.lower() for name in files} for filename in files: if scanned >= max_files or time.monotonic() >= deadline: truncated = True break path = Path(current) / filename try: stat = path.stat() except OSError: continue scanned += 1 extensions[path.suffix.lower() or "none"] += 1 if path.suffix.lower() in app.get("media_extensions", []): if is_extra_media(path, library_root, kind, app): continue subtitle_names = [ f"{path.stem}{ext}".lower() for ext in app.get("subtitle_extensions", []) ] items.append({ "path": str(path), "name": path.name, "drive": drive["id"], "library": kind, "root": library_root.name, "size": stat.st_size, "modified": stat.st_mtime, "has_subtitles": any(name in lower_files for name in subtitle_names), }) enrich_limit = int(app.get("library_metadata_enrich_max_items", 500)) should_enrich = bool(config.get("metadata", {}).get("tmdb_enabled", True)) and len(items) <= enrich_limit return normalize_library({ "drives": drive_stats(config), "items": sorted(items, key=lambda item: item["modified"], reverse=True), "collections": build_collections(config, items, enrich=should_enrich), "extensions": dict(extensions.most_common()), "scanned_files": scanned, "truncated": truncated, "metadata_enriched": should_enrich, })