Initial commit

2026-05-15 02:41:52 +00:00
commit e2de5f705a
73 changed files with 9965 additions and 0 deletions
--- a/backend/sortarr/library.py
+++ b/backend/sortarr/library.py
@@ -0,0 +1,331 @@
+from __future__ import annotations
+
+import os
+import re
+import time
+from collections import Counter
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+
+from .metadata import movie_metadata, series_metadata
+from .parser import clean_title, parse_media
+from .storage import drive_stats
+
+
+LIBRARY_ROOT_NAMES = {"movies", "shows", "tv", "tv shows"}
+TV_ROOT_NAMES = {"shows", "tv", "tv shows"}
+EPISODE_RE = re.compile(r"[Ss](\d{1,2})[ ._-]*[Ee](\d{1,3})")
+SEASON_FOLDER_RE = re.compile(r"season[ ._-]*(\d{1,2})", re.I)
+YEAR_RE = re.compile(r"\((19\d{2}|20\d{2})\)")
+ANY_YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b")
+VERSION_RE = re.compile(r"\b(2160p|1080p|720p|480p|remux|bluray|web[- .]?dl|webrip|hdtv|dvdrip|x264|x265|h[ ._-]?264|h[ ._-]?265|hevc|av1|hdr10?|dv|proper|repack|extended|unrated|directors?[ ._-]?cut|theatrical|imax)\b", re.I)
+EXTRA_FOLDER_NAMES = {
+    "behind the scenes",
+    "deleted scenes",
+    "extras",
+    "featurettes",
+    "interviews",
+    "samples",
+    "scenes",
+    "shorts",
+    "trailers",
+}
+
+
+def library_roots(root: Path) -> list[Path]:
+    matches = []
+    try:
+        children = list(root.iterdir())
+    except OSError:
+        return matches
+    for child in children:
+        if child.is_dir() and child.name.lower() in LIBRARY_ROOT_NAMES:
+            matches.append(child)
+    return matches
+
+
+def library_kind(library_root: Path) -> str:
+    return "tv" if library_root.name.lower() in TV_ROOT_NAMES else "movie"
+
+
+def infer_library_kind(path: str) -> str:
+    parts = {part.lower() for part in Path(path).parts}
+    if parts & TV_ROOT_NAMES:
+        return "tv"
+    if "movies" in parts:
+        return "movie"
+    return "other"
+
+
+def split_library_path(path: str) -> tuple[str, list[str]]:
+    parts = list(Path(path).parts)
+    lowered = [part.lower() for part in parts]
+    for root in LIBRARY_ROOT_NAMES:
+        if root in lowered:
+            idx = lowered.index(root)
+            return parts[idx], parts[idx + 1:]
+    return "", parts
+
+
+def identity_slug(title: str) -> str:
+    return re.sub(r"[^a-z0-9]+", " ", title.lower()).strip()
+
+
+def clean_collection_title(name: str) -> tuple[str, int | None]:
+    year_match = ANY_YEAR_RE.search(name)
+    year = int(year_match.group(1)) if year_match else None
+    title = clean_title(name)
+    return title, year
+
+
+def merge_key(kind: str, title: str, year: int | None = None) -> str:
+    slug = identity_slug(title)
+    if kind == "movie":
+        return f"movie::{slug}::{year or ''}"
+    return f"tv::{slug}"
+
+
+def file_version(item: dict) -> dict:
+    path = Path(item.get("path", ""))
+    text = " ".join(part for part in [path.parent.name, path.stem] if part)
+    tags = []
+    for match in VERSION_RE.finditer(text):
+        tag = match.group(1).replace(".", " ").replace("_", " ")
+        normalized = re.sub(r"\s+", " ", tag).strip()
+        if normalized.lower() not in {existing.lower() for existing in tags}:
+            tags.append(normalized)
+    return {
+        "path": item.get("path"),
+        "name": item.get("name"),
+        "drive": item.get("drive"),
+        "size": item.get("size") or 0,
+        "quality": next((tag for tag in tags if tag.lower() in {"2160p", "1080p", "720p", "480p"}), ""),
+        "tags": tags[:8],
+    }
+
+
+def is_extra_media(path: Path, library_root: Path, kind: str, app: dict) -> bool:
+    try:
+        relative = path.relative_to(library_root)
+    except ValueError:
+        relative = path
+    parts = [part.lower().replace("_", " ").replace(".", " ") for part in relative.parts[:-1]]
+    if kind == "movie" and any(part in EXTRA_FOLDER_NAMES for part in parts[1:]):
+        return True
+    lowered_name = path.name.lower().replace("_", " ").replace(".", " ")
+    return any(keyword and keyword.lower() in lowered_name for keyword in app.get("extra_keywords", []))
+
+
+def item_identity(item: dict) -> dict:
+    root, rel = split_library_path(item.get("path", ""))
+    kind = item.get("library") or infer_library_kind(item.get("path", ""))
+    parsed = parse_media(item.get("path", item.get("name", "")))
+    if kind == "tv" and rel:
+        title = clean_title(rel[0])
+        season = parsed.get("season")
+        episode = parsed.get("episode")
+        for part in rel:
+            match = SEASON_FOLDER_RE.search(part)
+            if match and not season:
+                season = int(match.group(1))
+        return {
+            "kind": "tv",
+            "root": root,
+            "title": title,
+            "key": merge_key("tv", title),
+            "season": season,
+            "episode": episode,
+        }
+    title, year = clean_collection_title(rel[0] if rel else parsed["title"])
+    year = year or parsed.get("year")
+    return {
+        "kind": "movie",
+        "root": root,
+        "title": title,
+        "year": year,
+        "slug": identity_slug(title),
+        "key": merge_key("movie", title, year),
+    }
+
+
+def normalize_library(library: dict) -> dict:
+    items = library.get("items", [])
+    kinds = Counter()
+    for item in items:
+        kind = item.get("library") or infer_library_kind(item.get("path", ""))
+        item["library"] = kind
+        if kind in {"movie", "tv"}:
+            kinds[kind] += 1
+    library["counts"] = {
+        "movies": kinds.get("movie", 0),
+        "tv": kinds.get("tv", 0),
+        "total": len(items),
+    }
+    if "collections" not in library:
+        library["collections"] = build_collections({}, items)
+    return library
+
+
+def build_collections(config: dict, items: list[dict], enrich: bool = False) -> dict:
+    movies: dict[str, dict] = {}
+    series: dict[str, dict] = {}
+    for item in items:
+        identity = item_identity(item)
+        if identity["kind"] == "tv":
+            show = series.setdefault(identity["key"], {
+                "key": identity["key"],
+                "title": identity["title"],
+                "library": "tv",
+                "files": [],
+                "seasons": {},
+                "metadata": {"title": identity["title"], "source": "filename", "seasons": {}},
+            })
+            show["files"].append(item)
+            season_no = identity.get("season") or 0
+            episode_no = identity.get("episode") or 0
+            season = show["seasons"].setdefault(str(season_no), {"season": season_no, "episodes": {}})
+            episode = season["episodes"].setdefault(str(episode_no), {
+                "season": season_no,
+                "episode": episode_no,
+                "title": f"S{season_no:02d}E{episode_no:02d}" if season_no and episode_no else item["name"],
+                "files": [],
+                "status": "present",
+            })
+            episode["files"].append(item)
+        else:
+            key = identity["key"]
+            if not identity.get("year"):
+                existing_key = next((candidate_key for candidate_key, candidate in movies.items() if candidate.get("slug") == identity["slug"]), None)
+                if existing_key:
+                    key = existing_key
+            elif key not in movies:
+                no_year_key = merge_key("movie", identity["title"], None)
+                if no_year_key in movies:
+                    movies[key] = movies.pop(no_year_key)
+                    movies[key]["key"] = key
+            movie = movies.setdefault(key, {
+                "key": key,
+                "title": identity["title"],
+                "year": identity.get("year"),
+                "slug": identity.get("slug"),
+                "library": "movie",
+                "files": [],
+                "versions": [],
+                "metadata": {"title": identity["title"], "source": "filename"},
+            })
+            movie["files"].append(item)
+            movie["versions"].append(file_version(item))
+            if not movie.get("year") and identity.get("year"):
+                movie["year"] = identity.get("year")
+
+    if enrich and config:
+        workers = int(config.get("app", {}).get("metadata_parallelism", 8))
+        tasks = {}
+        with ThreadPoolExecutor(max_workers=max(1, min(workers, 12))) as executor:
+            for movie in movies.values():
+                future = executor.submit(movie_metadata, config, movie["title"], movie.get("year"))
+                tasks[future] = movie
+            for show in series.values():
+                present_seasons = {int(season) for season in show["seasons"] if int(season) > 0}
+                future = executor.submit(series_metadata, config, show["title"], present_seasons)
+                tasks[future] = show
+            for future in as_completed(tasks):
+                try:
+                    tasks[future]["metadata"] = future.result()
+                except Exception:
+                    pass
+
+    today = time.strftime("%Y-%m-%d")
+    for show in series.values():
+        for season_no, season_meta in show.get("metadata", {}).get("seasons", {}).items():
+            season = show["seasons"].setdefault(season_no, {"season": int(season_no), "episodes": {}})
+            for meta_episode in season_meta.get("episodes", []):
+                key = str(meta_episode.get("episode") or 0)
+                existing = season["episodes"].get(key)
+                if existing:
+                    existing.update({
+                        "title": meta_episode.get("title") or existing["title"],
+                        "air_date": meta_episode.get("air_date"),
+                        "overview": meta_episode.get("overview"),
+                        "still": meta_episode.get("still"),
+                    })
+                else:
+                    air_date = meta_episode.get("air_date")
+                    season["episodes"][key] = {
+                        **meta_episode,
+                        "files": [],
+                        "status": "upcoming" if air_date and air_date > today else "missing",
+                    }
+        for season in show["seasons"].values():
+            season["episodes"] = sorted(season["episodes"].values(), key=lambda ep: ep.get("episode") or 0)
+        show["seasons"] = sorted(show["seasons"].values(), key=lambda season: season["season"])
+
+    return {
+        "movies": sorted(movies.values(), key=lambda movie: movie["title"].lower()),
+        "series": sorted(series.values(), key=lambda show: show["title"].lower()),
+    }
+
+
+def library_snapshot(config: dict) -> dict:
+    items = []
+    extensions = Counter()
+    ignored_dirs = {"$RECYCLE.BIN", "System Volume Information", ".Trash-1000"}
+    app = config["app"]
+    max_files = int(app.get("library_scan_max_files", 20000))
+    deadline = time.monotonic() + int(app.get("library_scan_timeout_seconds", 8))
+    scanned = 0
+    truncated = False
+    for drive in config.get("drives", []):
+        if scanned >= max_files or time.monotonic() >= deadline:
+            truncated = True
+            break
+        root = Path(drive["path"])
+        if not root.exists():
+            continue
+        for library_root in library_roots(root):
+            kind = library_kind(library_root)
+            for current, dirs, files in os.walk(library_root, onerror=lambda error: None):
+                if scanned >= max_files or time.monotonic() >= deadline:
+                    truncated = True
+                    break
+                dirs[:] = [name for name in dirs if name not in ignored_dirs]
+                lower_files = {name.lower() for name in files}
+                for filename in files:
+                    if scanned >= max_files or time.monotonic() >= deadline:
+                        truncated = True
+                        break
+                    path = Path(current) / filename
+                    try:
+                        stat = path.stat()
+                    except OSError:
+                        continue
+                    scanned += 1
+                    extensions[path.suffix.lower() or "none"] += 1
+                    if path.suffix.lower() in app.get("media_extensions", []):
+                        if is_extra_media(path, library_root, kind, app):
+                            continue
+                        subtitle_names = [
+                            f"{path.stem}{ext}".lower()
+                            for ext in app.get("subtitle_extensions", [])
+                        ]
+                        items.append({
+                            "path": str(path),
+                            "name": path.name,
+                            "drive": drive["id"],
+                            "library": kind,
+                            "root": library_root.name,
+                            "size": stat.st_size,
+                            "modified": stat.st_mtime,
+                            "has_subtitles": any(name in lower_files for name in subtitle_names),
+                        })
+    enrich_limit = int(app.get("library_metadata_enrich_max_items", 500))
+    should_enrich = bool(config.get("metadata", {}).get("tmdb_enabled", True)) and len(items) <= enrich_limit
+    return normalize_library({
+        "drives": drive_stats(config),
+        "items": sorted(items, key=lambda item: item["modified"], reverse=True),
+        "collections": build_collections(config, items, enrich=should_enrich),
+        "extensions": dict(extensions.most_common()),
+        "scanned_files": scanned,
+        "truncated": truncated,
+        "metadata_enriched": should_enrich,
+    })