Initial commit
This commit is contained in:
331
backend/sortarr/library.py
Normal file
331
backend/sortarr/library.py
Normal file
@@ -0,0 +1,331 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from collections import Counter
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from pathlib import Path
|
||||
|
||||
from .metadata import movie_metadata, series_metadata
|
||||
from .parser import clean_title, parse_media
|
||||
from .storage import drive_stats
|
||||
|
||||
|
||||
LIBRARY_ROOT_NAMES = {"movies", "shows", "tv", "tv shows"}
|
||||
TV_ROOT_NAMES = {"shows", "tv", "tv shows"}
|
||||
EPISODE_RE = re.compile(r"[Ss](\d{1,2})[ ._-]*[Ee](\d{1,3})")
|
||||
SEASON_FOLDER_RE = re.compile(r"season[ ._-]*(\d{1,2})", re.I)
|
||||
YEAR_RE = re.compile(r"\((19\d{2}|20\d{2})\)")
|
||||
ANY_YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b")
|
||||
VERSION_RE = re.compile(r"\b(2160p|1080p|720p|480p|remux|bluray|web[- .]?dl|webrip|hdtv|dvdrip|x264|x265|h[ ._-]?264|h[ ._-]?265|hevc|av1|hdr10?|dv|proper|repack|extended|unrated|directors?[ ._-]?cut|theatrical|imax)\b", re.I)
|
||||
EXTRA_FOLDER_NAMES = {
|
||||
"behind the scenes",
|
||||
"deleted scenes",
|
||||
"extras",
|
||||
"featurettes",
|
||||
"interviews",
|
||||
"samples",
|
||||
"scenes",
|
||||
"shorts",
|
||||
"trailers",
|
||||
}
|
||||
|
||||
|
||||
def library_roots(root: Path) -> list[Path]:
|
||||
matches = []
|
||||
try:
|
||||
children = list(root.iterdir())
|
||||
except OSError:
|
||||
return matches
|
||||
for child in children:
|
||||
if child.is_dir() and child.name.lower() in LIBRARY_ROOT_NAMES:
|
||||
matches.append(child)
|
||||
return matches
|
||||
|
||||
|
||||
def library_kind(library_root: Path) -> str:
|
||||
return "tv" if library_root.name.lower() in TV_ROOT_NAMES else "movie"
|
||||
|
||||
|
||||
def infer_library_kind(path: str) -> str:
|
||||
parts = {part.lower() for part in Path(path).parts}
|
||||
if parts & TV_ROOT_NAMES:
|
||||
return "tv"
|
||||
if "movies" in parts:
|
||||
return "movie"
|
||||
return "other"
|
||||
|
||||
|
||||
def split_library_path(path: str) -> tuple[str, list[str]]:
|
||||
parts = list(Path(path).parts)
|
||||
lowered = [part.lower() for part in parts]
|
||||
for root in LIBRARY_ROOT_NAMES:
|
||||
if root in lowered:
|
||||
idx = lowered.index(root)
|
||||
return parts[idx], parts[idx + 1:]
|
||||
return "", parts
|
||||
|
||||
|
||||
def identity_slug(title: str) -> str:
|
||||
return re.sub(r"[^a-z0-9]+", " ", title.lower()).strip()
|
||||
|
||||
|
||||
def clean_collection_title(name: str) -> tuple[str, int | None]:
|
||||
year_match = ANY_YEAR_RE.search(name)
|
||||
year = int(year_match.group(1)) if year_match else None
|
||||
title = clean_title(name)
|
||||
return title, year
|
||||
|
||||
|
||||
def merge_key(kind: str, title: str, year: int | None = None) -> str:
|
||||
slug = identity_slug(title)
|
||||
if kind == "movie":
|
||||
return f"movie::{slug}::{year or ''}"
|
||||
return f"tv::{slug}"
|
||||
|
||||
|
||||
def file_version(item: dict) -> dict:
|
||||
path = Path(item.get("path", ""))
|
||||
text = " ".join(part for part in [path.parent.name, path.stem] if part)
|
||||
tags = []
|
||||
for match in VERSION_RE.finditer(text):
|
||||
tag = match.group(1).replace(".", " ").replace("_", " ")
|
||||
normalized = re.sub(r"\s+", " ", tag).strip()
|
||||
if normalized.lower() not in {existing.lower() for existing in tags}:
|
||||
tags.append(normalized)
|
||||
return {
|
||||
"path": item.get("path"),
|
||||
"name": item.get("name"),
|
||||
"drive": item.get("drive"),
|
||||
"size": item.get("size") or 0,
|
||||
"quality": next((tag for tag in tags if tag.lower() in {"2160p", "1080p", "720p", "480p"}), ""),
|
||||
"tags": tags[:8],
|
||||
}
|
||||
|
||||
|
||||
def is_extra_media(path: Path, library_root: Path, kind: str, app: dict) -> bool:
|
||||
try:
|
||||
relative = path.relative_to(library_root)
|
||||
except ValueError:
|
||||
relative = path
|
||||
parts = [part.lower().replace("_", " ").replace(".", " ") for part in relative.parts[:-1]]
|
||||
if kind == "movie" and any(part in EXTRA_FOLDER_NAMES for part in parts[1:]):
|
||||
return True
|
||||
lowered_name = path.name.lower().replace("_", " ").replace(".", " ")
|
||||
return any(keyword and keyword.lower() in lowered_name for keyword in app.get("extra_keywords", []))
|
||||
|
||||
|
||||
def item_identity(item: dict) -> dict:
|
||||
root, rel = split_library_path(item.get("path", ""))
|
||||
kind = item.get("library") or infer_library_kind(item.get("path", ""))
|
||||
parsed = parse_media(item.get("path", item.get("name", "")))
|
||||
if kind == "tv" and rel:
|
||||
title = clean_title(rel[0])
|
||||
season = parsed.get("season")
|
||||
episode = parsed.get("episode")
|
||||
for part in rel:
|
||||
match = SEASON_FOLDER_RE.search(part)
|
||||
if match and not season:
|
||||
season = int(match.group(1))
|
||||
return {
|
||||
"kind": "tv",
|
||||
"root": root,
|
||||
"title": title,
|
||||
"key": merge_key("tv", title),
|
||||
"season": season,
|
||||
"episode": episode,
|
||||
}
|
||||
title, year = clean_collection_title(rel[0] if rel else parsed["title"])
|
||||
year = year or parsed.get("year")
|
||||
return {
|
||||
"kind": "movie",
|
||||
"root": root,
|
||||
"title": title,
|
||||
"year": year,
|
||||
"slug": identity_slug(title),
|
||||
"key": merge_key("movie", title, year),
|
||||
}
|
||||
|
||||
|
||||
def normalize_library(library: dict) -> dict:
|
||||
items = library.get("items", [])
|
||||
kinds = Counter()
|
||||
for item in items:
|
||||
kind = item.get("library") or infer_library_kind(item.get("path", ""))
|
||||
item["library"] = kind
|
||||
if kind in {"movie", "tv"}:
|
||||
kinds[kind] += 1
|
||||
library["counts"] = {
|
||||
"movies": kinds.get("movie", 0),
|
||||
"tv": kinds.get("tv", 0),
|
||||
"total": len(items),
|
||||
}
|
||||
if "collections" not in library:
|
||||
library["collections"] = build_collections({}, items)
|
||||
return library
|
||||
|
||||
|
||||
def build_collections(config: dict, items: list[dict], enrich: bool = False) -> dict:
|
||||
movies: dict[str, dict] = {}
|
||||
series: dict[str, dict] = {}
|
||||
for item in items:
|
||||
identity = item_identity(item)
|
||||
if identity["kind"] == "tv":
|
||||
show = series.setdefault(identity["key"], {
|
||||
"key": identity["key"],
|
||||
"title": identity["title"],
|
||||
"library": "tv",
|
||||
"files": [],
|
||||
"seasons": {},
|
||||
"metadata": {"title": identity["title"], "source": "filename", "seasons": {}},
|
||||
})
|
||||
show["files"].append(item)
|
||||
season_no = identity.get("season") or 0
|
||||
episode_no = identity.get("episode") or 0
|
||||
season = show["seasons"].setdefault(str(season_no), {"season": season_no, "episodes": {}})
|
||||
episode = season["episodes"].setdefault(str(episode_no), {
|
||||
"season": season_no,
|
||||
"episode": episode_no,
|
||||
"title": f"S{season_no:02d}E{episode_no:02d}" if season_no and episode_no else item["name"],
|
||||
"files": [],
|
||||
"status": "present",
|
||||
})
|
||||
episode["files"].append(item)
|
||||
else:
|
||||
key = identity["key"]
|
||||
if not identity.get("year"):
|
||||
existing_key = next((candidate_key for candidate_key, candidate in movies.items() if candidate.get("slug") == identity["slug"]), None)
|
||||
if existing_key:
|
||||
key = existing_key
|
||||
elif key not in movies:
|
||||
no_year_key = merge_key("movie", identity["title"], None)
|
||||
if no_year_key in movies:
|
||||
movies[key] = movies.pop(no_year_key)
|
||||
movies[key]["key"] = key
|
||||
movie = movies.setdefault(key, {
|
||||
"key": key,
|
||||
"title": identity["title"],
|
||||
"year": identity.get("year"),
|
||||
"slug": identity.get("slug"),
|
||||
"library": "movie",
|
||||
"files": [],
|
||||
"versions": [],
|
||||
"metadata": {"title": identity["title"], "source": "filename"},
|
||||
})
|
||||
movie["files"].append(item)
|
||||
movie["versions"].append(file_version(item))
|
||||
if not movie.get("year") and identity.get("year"):
|
||||
movie["year"] = identity.get("year")
|
||||
|
||||
if enrich and config:
|
||||
workers = int(config.get("app", {}).get("metadata_parallelism", 8))
|
||||
tasks = {}
|
||||
with ThreadPoolExecutor(max_workers=max(1, min(workers, 12))) as executor:
|
||||
for movie in movies.values():
|
||||
future = executor.submit(movie_metadata, config, movie["title"], movie.get("year"))
|
||||
tasks[future] = movie
|
||||
for show in series.values():
|
||||
present_seasons = {int(season) for season in show["seasons"] if int(season) > 0}
|
||||
future = executor.submit(series_metadata, config, show["title"], present_seasons)
|
||||
tasks[future] = show
|
||||
for future in as_completed(tasks):
|
||||
try:
|
||||
tasks[future]["metadata"] = future.result()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
today = time.strftime("%Y-%m-%d")
|
||||
for show in series.values():
|
||||
for season_no, season_meta in show.get("metadata", {}).get("seasons", {}).items():
|
||||
season = show["seasons"].setdefault(season_no, {"season": int(season_no), "episodes": {}})
|
||||
for meta_episode in season_meta.get("episodes", []):
|
||||
key = str(meta_episode.get("episode") or 0)
|
||||
existing = season["episodes"].get(key)
|
||||
if existing:
|
||||
existing.update({
|
||||
"title": meta_episode.get("title") or existing["title"],
|
||||
"air_date": meta_episode.get("air_date"),
|
||||
"overview": meta_episode.get("overview"),
|
||||
"still": meta_episode.get("still"),
|
||||
})
|
||||
else:
|
||||
air_date = meta_episode.get("air_date")
|
||||
season["episodes"][key] = {
|
||||
**meta_episode,
|
||||
"files": [],
|
||||
"status": "upcoming" if air_date and air_date > today else "missing",
|
||||
}
|
||||
for season in show["seasons"].values():
|
||||
season["episodes"] = sorted(season["episodes"].values(), key=lambda ep: ep.get("episode") or 0)
|
||||
show["seasons"] = sorted(show["seasons"].values(), key=lambda season: season["season"])
|
||||
|
||||
return {
|
||||
"movies": sorted(movies.values(), key=lambda movie: movie["title"].lower()),
|
||||
"series": sorted(series.values(), key=lambda show: show["title"].lower()),
|
||||
}
|
||||
|
||||
|
||||
def library_snapshot(config: dict) -> dict:
|
||||
items = []
|
||||
extensions = Counter()
|
||||
ignored_dirs = {"$RECYCLE.BIN", "System Volume Information", ".Trash-1000"}
|
||||
app = config["app"]
|
||||
max_files = int(app.get("library_scan_max_files", 20000))
|
||||
deadline = time.monotonic() + int(app.get("library_scan_timeout_seconds", 8))
|
||||
scanned = 0
|
||||
truncated = False
|
||||
for drive in config.get("drives", []):
|
||||
if scanned >= max_files or time.monotonic() >= deadline:
|
||||
truncated = True
|
||||
break
|
||||
root = Path(drive["path"])
|
||||
if not root.exists():
|
||||
continue
|
||||
for library_root in library_roots(root):
|
||||
kind = library_kind(library_root)
|
||||
for current, dirs, files in os.walk(library_root, onerror=lambda error: None):
|
||||
if scanned >= max_files or time.monotonic() >= deadline:
|
||||
truncated = True
|
||||
break
|
||||
dirs[:] = [name for name in dirs if name not in ignored_dirs]
|
||||
lower_files = {name.lower() for name in files}
|
||||
for filename in files:
|
||||
if scanned >= max_files or time.monotonic() >= deadline:
|
||||
truncated = True
|
||||
break
|
||||
path = Path(current) / filename
|
||||
try:
|
||||
stat = path.stat()
|
||||
except OSError:
|
||||
continue
|
||||
scanned += 1
|
||||
extensions[path.suffix.lower() or "none"] += 1
|
||||
if path.suffix.lower() in app.get("media_extensions", []):
|
||||
if is_extra_media(path, library_root, kind, app):
|
||||
continue
|
||||
subtitle_names = [
|
||||
f"{path.stem}{ext}".lower()
|
||||
for ext in app.get("subtitle_extensions", [])
|
||||
]
|
||||
items.append({
|
||||
"path": str(path),
|
||||
"name": path.name,
|
||||
"drive": drive["id"],
|
||||
"library": kind,
|
||||
"root": library_root.name,
|
||||
"size": stat.st_size,
|
||||
"modified": stat.st_mtime,
|
||||
"has_subtitles": any(name in lower_files for name in subtitle_names),
|
||||
})
|
||||
enrich_limit = int(app.get("library_metadata_enrich_max_items", 500))
|
||||
should_enrich = bool(config.get("metadata", {}).get("tmdb_enabled", True)) and len(items) <= enrich_limit
|
||||
return normalize_library({
|
||||
"drives": drive_stats(config),
|
||||
"items": sorted(items, key=lambda item: item["modified"], reverse=True),
|
||||
"collections": build_collections(config, items, enrich=should_enrich),
|
||||
"extensions": dict(extensions.most_common()),
|
||||
"scanned_files": scanned,
|
||||
"truncated": truncated,
|
||||
"metadata_enriched": should_enrich,
|
||||
})
|
||||
Reference in New Issue
Block a user