368 lines
14 KiB
Python
368 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
import time
|
|
from collections import Counter
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from pathlib import Path
|
|
|
|
from .metadata import movie_metadata, series_metadata
|
|
from .parser import clean_title, parse_media
|
|
from .storage import drive_stats
|
|
|
|
|
|
LIBRARY_ROOT_NAMES = {"movies", "shows", "tv", "tv shows"}
|
|
TV_ROOT_NAMES = {"shows", "tv", "tv shows"}
|
|
EPISODE_RE = re.compile(r"[Ss](\d{1,2})[ ._-]*[Ee](\d{1,3})")
|
|
SEASON_FOLDER_RE = re.compile(r"season[ ._-]*(\d{1,2})", re.I)
|
|
YEAR_RE = re.compile(r"\((19\d{2}|20\d{2})\)")
|
|
ANY_YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b")
|
|
VERSION_RE = re.compile(r"\b(2160p|1080p|720p|480p|remux|bluray|web[- .]?dl|webrip|hdtv|dvdrip|x264|x265|h[ ._-]?264|h[ ._-]?265|hevc|av1|hdr10?|dv|proper|repack|extended|unrated|directors?[ ._-]?cut|theatrical|imax)\b", re.I)
|
|
EXTRA_FOLDER_NAMES = {
|
|
"behind the scenes",
|
|
"deleted scenes",
|
|
"extras",
|
|
"featurettes",
|
|
"interviews",
|
|
"samples",
|
|
"scenes",
|
|
"shorts",
|
|
"trailers",
|
|
}
|
|
|
|
|
|
def library_roots(root: Path) -> list[Path]:
|
|
matches = []
|
|
try:
|
|
children = list(root.iterdir())
|
|
except OSError:
|
|
return matches
|
|
for child in children:
|
|
if child.is_dir() and child.name.lower() in LIBRARY_ROOT_NAMES:
|
|
matches.append(child)
|
|
return matches
|
|
|
|
|
|
def library_kind(library_root: Path) -> str:
|
|
return "tv" if library_root.name.lower() in TV_ROOT_NAMES else "movie"
|
|
|
|
|
|
def infer_library_kind(path: str) -> str:
|
|
parts = {part.lower() for part in Path(path).parts}
|
|
if parts & TV_ROOT_NAMES:
|
|
return "tv"
|
|
if "movies" in parts:
|
|
return "movie"
|
|
return "other"
|
|
|
|
|
|
def split_library_path(path: str) -> tuple[str, list[str]]:
|
|
parts = list(Path(path).parts)
|
|
lowered = [part.lower() for part in parts]
|
|
for root in LIBRARY_ROOT_NAMES:
|
|
if root in lowered:
|
|
idx = lowered.index(root)
|
|
return parts[idx], parts[idx + 1:]
|
|
return "", parts
|
|
|
|
|
|
def identity_slug(title: str) -> str:
|
|
return re.sub(r"[^a-z0-9]+", " ", title.lower()).strip()
|
|
|
|
|
|
def clean_collection_title(name: str) -> tuple[str, int | None]:
|
|
year_match = ANY_YEAR_RE.search(name)
|
|
year = int(year_match.group(1)) if year_match else None
|
|
title = clean_title(name)
|
|
return title, year
|
|
|
|
|
|
def merge_key(kind: str, title: str, year: int | None = None) -> str:
|
|
slug = identity_slug(title)
|
|
if kind == "movie":
|
|
return f"movie::{slug}::{year or ''}"
|
|
return f"tv::{slug}"
|
|
|
|
|
|
def file_version(item: dict) -> dict:
|
|
path = Path(item.get("path", ""))
|
|
text = " ".join(part for part in [path.parent.name, path.stem] if part)
|
|
tags = []
|
|
for match in VERSION_RE.finditer(text):
|
|
tag = match.group(1).replace(".", " ").replace("_", " ")
|
|
normalized = re.sub(r"\s+", " ", tag).strip()
|
|
if normalized.lower() not in {existing.lower() for existing in tags}:
|
|
tags.append(normalized)
|
|
return {
|
|
"path": item.get("path"),
|
|
"name": item.get("name"),
|
|
"drive": item.get("drive"),
|
|
"size": item.get("size") or 0,
|
|
"quality": next((tag for tag in tags if tag.lower() in {"2160p", "1080p", "720p", "480p"}), ""),
|
|
"tags": tags[:8],
|
|
}
|
|
|
|
|
|
def is_extra_media(path: Path, library_root: Path, kind: str, app: dict) -> bool:
|
|
try:
|
|
relative = path.relative_to(library_root)
|
|
except ValueError:
|
|
relative = path
|
|
parts = [part.lower().replace("_", " ").replace(".", " ") for part in relative.parts[:-1]]
|
|
if kind == "movie" and any(part in EXTRA_FOLDER_NAMES for part in parts[1:]):
|
|
return True
|
|
lowered_name = path.name.lower().replace("_", " ").replace(".", " ")
|
|
return any(keyword and keyword.lower() in lowered_name for keyword in app.get("extra_keywords", []))
|
|
|
|
|
|
def item_identity(item: dict) -> dict:
|
|
root, rel = split_library_path(item.get("path", ""))
|
|
kind = item.get("library") or infer_library_kind(item.get("path", ""))
|
|
parsed = parse_media(item.get("path", item.get("name", "")))
|
|
if kind == "tv" and rel:
|
|
title = clean_title(rel[0])
|
|
season = parsed.get("season")
|
|
episode = parsed.get("episode")
|
|
for part in rel:
|
|
match = SEASON_FOLDER_RE.search(part)
|
|
if match and not season:
|
|
season = int(match.group(1))
|
|
return {
|
|
"kind": "tv",
|
|
"root": root,
|
|
"title": title,
|
|
"key": merge_key("tv", title),
|
|
"season": season,
|
|
"episode": episode,
|
|
}
|
|
title, year = clean_collection_title(rel[0] if rel else parsed["title"])
|
|
year = year or parsed.get("year")
|
|
return {
|
|
"kind": "movie",
|
|
"root": root,
|
|
"title": title,
|
|
"year": year,
|
|
"slug": identity_slug(title),
|
|
"key": merge_key("movie", title, year),
|
|
}
|
|
|
|
|
|
def normalize_library(library: dict) -> dict:
|
|
items = library.get("items", [])
|
|
kinds = Counter()
|
|
for item in items:
|
|
kind = item.get("library") or infer_library_kind(item.get("path", ""))
|
|
item["library"] = kind
|
|
if kind in {"movie", "tv"}:
|
|
kinds[kind] += 1
|
|
library["counts"] = {
|
|
"movies": kinds.get("movie", 0),
|
|
"tv": kinds.get("tv", 0),
|
|
"total": len(items),
|
|
}
|
|
if "collections" not in library:
|
|
library["collections"] = build_collections({}, items)
|
|
return library
|
|
|
|
|
|
def build_collections(config: dict, items: list[dict], enrich: bool = False) -> dict:
|
|
movies: dict[str, dict] = {}
|
|
series: dict[str, dict] = {}
|
|
for item in items:
|
|
identity = item_identity(item)
|
|
if identity["kind"] == "tv":
|
|
show = series.setdefault(identity["key"], {
|
|
"key": identity["key"],
|
|
"title": identity["title"],
|
|
"library": "tv",
|
|
"files": [],
|
|
"seasons": {},
|
|
"metadata": {"title": identity["title"], "source": "filename", "seasons": {}},
|
|
})
|
|
show["files"].append(item)
|
|
season_no = identity.get("season") or 0
|
|
episode_no = identity.get("episode") or 0
|
|
season = show["seasons"].setdefault(str(season_no), {"season": season_no, "episodes": {}})
|
|
episode = season["episodes"].setdefault(str(episode_no), {
|
|
"season": season_no,
|
|
"episode": episode_no,
|
|
"title": f"S{season_no:02d}E{episode_no:02d}" if season_no and episode_no else item["name"],
|
|
"files": [],
|
|
"status": "present",
|
|
})
|
|
episode["files"].append(item)
|
|
else:
|
|
key = identity["key"]
|
|
if not identity.get("year"):
|
|
existing_key = next((candidate_key for candidate_key, candidate in movies.items() if candidate.get("slug") == identity["slug"]), None)
|
|
if existing_key:
|
|
key = existing_key
|
|
elif key not in movies:
|
|
no_year_key = merge_key("movie", identity["title"], None)
|
|
if no_year_key in movies:
|
|
movies[key] = movies.pop(no_year_key)
|
|
movies[key]["key"] = key
|
|
movie = movies.setdefault(key, {
|
|
"key": key,
|
|
"title": identity["title"],
|
|
"year": identity.get("year"),
|
|
"slug": identity.get("slug"),
|
|
"library": "movie",
|
|
"files": [],
|
|
"versions": [],
|
|
"metadata": {"title": identity["title"], "source": "filename"},
|
|
})
|
|
movie["files"].append(item)
|
|
movie["versions"].append(file_version(item))
|
|
if not movie.get("year") and identity.get("year"):
|
|
movie["year"] = identity.get("year")
|
|
|
|
if enrich and config:
|
|
workers = int(config.get("app", {}).get("metadata_parallelism", 8))
|
|
tasks = {}
|
|
with ThreadPoolExecutor(max_workers=max(1, min(workers, 12))) as executor:
|
|
for movie in movies.values():
|
|
future = executor.submit(movie_metadata, config, movie["title"], movie.get("year"))
|
|
tasks[future] = movie
|
|
for show in series.values():
|
|
present_seasons = {int(season) for season in show["seasons"] if int(season) > 0}
|
|
future = executor.submit(series_metadata, config, show["title"], present_seasons)
|
|
tasks[future] = show
|
|
for future in as_completed(tasks):
|
|
try:
|
|
tasks[future]["metadata"] = future.result()
|
|
except Exception:
|
|
pass
|
|
|
|
today = time.strftime("%Y-%m-%d")
|
|
for show in series.values():
|
|
for season_no, season_meta in show.get("metadata", {}).get("seasons", {}).items():
|
|
season = show["seasons"].setdefault(season_no, {"season": int(season_no), "episodes": {}})
|
|
for meta_episode in season_meta.get("episodes", []):
|
|
key = str(meta_episode.get("episode") or 0)
|
|
existing = season["episodes"].get(key)
|
|
if existing:
|
|
existing.update({
|
|
"title": meta_episode.get("title") or existing["title"],
|
|
"air_date": meta_episode.get("air_date"),
|
|
"overview": meta_episode.get("overview"),
|
|
"still": meta_episode.get("still"),
|
|
})
|
|
else:
|
|
air_date = meta_episode.get("air_date")
|
|
season["episodes"][key] = {
|
|
**meta_episode,
|
|
"files": [],
|
|
"status": "upcoming" if air_date and air_date > today else "missing",
|
|
}
|
|
for season in show["seasons"].values():
|
|
season["episodes"] = sorted(season["episodes"].values(), key=lambda ep: ep.get("episode") or 0)
|
|
show["seasons"] = sorted(show["seasons"].values(), key=lambda season: season["season"])
|
|
|
|
return {
|
|
"movies": sorted(movies.values(), key=lambda movie: movie["title"].lower()),
|
|
"series": sorted(series.values(), key=lambda show: show["title"].lower()),
|
|
}
|
|
|
|
|
|
def preserve_metadata(collections: dict, previous_library: dict | None) -> dict:
|
|
previous = (previous_library or {}).get("collections") or {}
|
|
previous_by_key = {
|
|
item.get("key"): item
|
|
for group in ("movies", "series")
|
|
for item in previous.get(group, [])
|
|
if item.get("key")
|
|
}
|
|
for group in ("movies", "series"):
|
|
for item in collections.get(group, []):
|
|
old = previous_by_key.get(item.get("key"))
|
|
old_meta = (old or {}).get("metadata") or {}
|
|
if old_meta.get("source") == "tmdb":
|
|
item["metadata"] = old_meta
|
|
if old_meta.get("manual"):
|
|
item["title"] = old_meta.get("title") or item.get("title")
|
|
if item.get("library") == "movie" and old_meta.get("release_date"):
|
|
item["year"] = int(old_meta["release_date"][:4])
|
|
return collections
|
|
|
|
|
|
def library_snapshot(config: dict, previous_library: dict | None = None) -> dict:
|
|
items = []
|
|
extensions = Counter()
|
|
ignored_dirs = {"$RECYCLE.BIN", "System Volume Information", ".Trash-1000"}
|
|
app = config["app"]
|
|
max_files = int(app.get("library_scan_max_files", 20000))
|
|
deadline = time.monotonic() + int(app.get("library_scan_timeout_seconds", 8))
|
|
scanned = 0
|
|
truncated = False
|
|
for drive in config.get("drives", []):
|
|
if scanned >= max_files or time.monotonic() >= deadline:
|
|
truncated = True
|
|
break
|
|
root = Path(drive["path"])
|
|
if not root.exists():
|
|
continue
|
|
for library_root in library_roots(root):
|
|
kind = library_kind(library_root)
|
|
for current, dirs, files in os.walk(library_root, onerror=lambda error: None):
|
|
if scanned >= max_files or time.monotonic() >= deadline:
|
|
truncated = True
|
|
break
|
|
dirs[:] = [name for name in dirs if name not in ignored_dirs]
|
|
lower_files = {name.lower() for name in files}
|
|
for filename in files:
|
|
if scanned >= max_files or time.monotonic() >= deadline:
|
|
truncated = True
|
|
break
|
|
path = Path(current) / filename
|
|
try:
|
|
stat = path.stat()
|
|
except OSError:
|
|
continue
|
|
scanned += 1
|
|
extensions[path.suffix.lower() or "none"] += 1
|
|
if path.suffix.lower() in app.get("media_extensions", []):
|
|
if is_extra_media(path, library_root, kind, app):
|
|
continue
|
|
subtitle_names = [
|
|
f"{path.stem}{ext}".lower()
|
|
for ext in app.get("subtitle_extensions", [])
|
|
]
|
|
items.append({
|
|
"path": str(path),
|
|
"name": path.name,
|
|
"drive": drive["id"],
|
|
"library": kind,
|
|
"root": library_root.name,
|
|
"size": stat.st_size,
|
|
"modified": stat.st_mtime,
|
|
"has_subtitles": any(name in lower_files for name in subtitle_names),
|
|
})
|
|
enrich_limit = int(app.get("library_metadata_enrich_max_items", 500))
|
|
should_enrich = bool(config.get("metadata", {}).get("tmdb_enabled", True)) and len(items) <= enrich_limit
|
|
collections = build_collections(config, items, enrich=should_enrich)
|
|
if not should_enrich:
|
|
collections = preserve_metadata(collections, previous_library)
|
|
return normalize_library({
|
|
"drives": drive_stats(config),
|
|
"items": sorted(items, key=lambda item: item["modified"], reverse=True),
|
|
"collections": collections,
|
|
"extensions": dict(extensions.most_common()),
|
|
"scanned_files": scanned,
|
|
"truncated": truncated,
|
|
"metadata_enriched": should_enrich,
|
|
"identifications": (previous_library or {}).get("identifications", {}),
|
|
})
|
|
|
|
|
|
def enrich_library_metadata(config: dict, library: dict) -> dict:
|
|
items = library.get("items") or []
|
|
enriched = {
|
|
**library,
|
|
"collections": build_collections(config, items, enrich=True),
|
|
"metadata_enriched": True,
|
|
"metadata_refreshed_at": time.time(),
|
|
}
|
|
return normalize_library(enriched)
|