Initial commit

This commit is contained in:
scoped
2026-05-15 02:41:52 +00:00
commit e2de5f705a
73 changed files with 9965 additions and 0 deletions

15
dist/sortarr/backend/Dockerfile vendored Normal file
View File

@@ -0,0 +1,15 @@
FROM python:3.12-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
RUN apt-get update \
&& apt-get install -y --no-install-recommends ffmpeg \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY sortarr /app/sortarr
COPY default-config /app/default-config
EXPOSE 8099
CMD ["python", "-m", "sortarr.app"]

View File

@@ -0,0 +1,90 @@
[app]
name = "Sortarr"
dry_run = true
log_level = "INFO"
scan_interval_seconds = 20
settle_seconds = 90
stable_checks = 2
incomplete_suffixes = [".part", ".partial", ".!qB", ".tmp", ".crdownload"]
media_extensions = [".mkv", ".mp4", ".avi", ".mov", ".m4v", ".wmv", ".ts"]
subtitle_extensions = [".srt", ".ass", ".ssa", ".vtt", ".sub"]
extra_keywords = ["sample", "trailer", "behind the scenes", "featurette", "deleted scene"]
library_scan_max_files = 20000
library_scan_timeout_seconds = 8
cache_max_bytes = 21474836480
auto_move_min_confidence = 90
review_min_confidence = 60
organization_metadata_budget_seconds = 25
organization_metadata_timeout_seconds = 3
metadata_parallelism = 8
[paths]
downloads = "/downloads"
data = "/data"
logs = "/logs"
cache = "/data/cache"
[[drives]]
id = "drive1"
name = "Media Drive 1"
path = "/media/drive1"
min_free_gb = 20
[[drives]]
id = "drive2"
name = "Media Drive 2"
path = "/media/drive2"
min_free_gb = 20
[[drives]]
id = "drive3"
name = "Media Drive 3"
path = "/media/drive3"
min_free_gb = 20
[[drives]]
id = "drive4"
name = "Media Drive 4"
path = "/media/drive4"
min_free_gb = 20
[library]
movie_folder = "Movies/{title} ({year})"
series_folder = "Shows/{title}/Season {season:02d}"
movie_file = "{title} ({year}){quality}{ext}"
episode_file = "{title} - S{season:02d}E{episode:02d}{multi_episode} - {episode_title}{quality}{ext}"
subtitle_file = "{basename}{language}{ext}"
unknown_folder = "Unsorted/{title}"
collision = "keep-both" # keep-both, skip, replace
duplicate = "skip" # skip, keep-both
permissions_mode = "664"
directory_mode = "775"
[metadata]
write_nfo = true
provider_order = ["filename"]
prefer_existing_nfo = true
tmdb_api_key = ""
tmdb_bearer_token = ""
tmdb_language = "en-US"
tmdb_image_base = "https://image.tmdb.org/t/p/w342"
tmdb_enabled = true
[[release_providers]]
id = "tmdb-rss"
name = "TMDb RSS"
enabled = false
type = "rss"
url = "https://www.themoviedb.org/rss/movie/upcoming"
[[release_providers]]
id = "tvmaze-premieres"
name = "TVMaze Premieres"
enabled = false
type = "json"
url = "https://api.tvmaze.com/schedule"
[theme]
default = "slate"
allow_custom_css = true
custom_css_path = "/config/custom-theme.css"

View File

@@ -0,0 +1,2 @@
__all__ = ["config", "organizer", "server"]

356
dist/sortarr/backend/sortarr/app.py vendored Normal file
View File

@@ -0,0 +1,356 @@
from __future__ import annotations
import json
import os
from http import HTTPStatus
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from urllib.parse import urlparse
from urllib.parse import parse_qs, unquote
from .config import load_config, public_config
from .downloads import downloads_snapshot
from .library import library_snapshot, normalize_library
from .logging_setup import configure_logging
from .media_probe import edit_track, media_probe
from .metadata import test_tmdb, search_tmdb, identify_item
from .organizer import execute_bundle_plan
from .releases import fetch_releases
from .scanner import Scanner
from .store import JsonStore
from .storage import drive_stats
from .tools import run_next_transcode, subtitle_audit, transcode_plan
SETTINGS_SCHEMA = {
"app": {
"name": str,
"dry_run": bool,
"log_level": str,
"scan_interval_seconds": int,
"settle_seconds": int,
"stable_checks": int,
"incomplete_suffixes": list,
"media_extensions": list,
"subtitle_extensions": list,
"extra_keywords": list,
"library_scan_max_files": int,
"library_scan_timeout_seconds": int,
"cache_max_bytes": int,
"auto_move_min_confidence": int,
"review_min_confidence": int,
"organization_metadata_budget_seconds": int,
"organization_metadata_timeout_seconds": int,
"metadata_parallelism": int,
},
"paths": {
"downloads": str,
"data": str,
"logs": str,
"cache": str,
},
"library": {
"movie_folder": str,
"series_folder": str,
"movie_file": str,
"episode_file": str,
"subtitle_file": str,
"unknown_folder": str,
"collision": str,
"duplicate": str,
"permissions_mode": str,
"directory_mode": str,
},
"metadata": {
"write_nfo": bool,
"provider_order": list,
"prefer_existing_nfo": bool,
"tmdb_api_key": str,
"tmdb_bearer_token": str,
"tmdb_language": str,
"tmdb_image_base": str,
"tmdb_enabled": bool,
},
"theme": {
"default": str,
"allow_custom_css": bool,
"custom_css_path": str,
},
}
def deep_merge(base: dict, override: dict) -> dict:
for key, value in override.items():
if isinstance(value, dict) and isinstance(base.get(key), dict):
deep_merge(base[key], value)
else:
base[key] = value
return base
def coerce_value(value, caster):
if caster is bool:
return bool(value)
if caster is int:
return int(value)
if caster is list:
if isinstance(value, list):
return [str(item).strip() for item in value if str(item).strip()]
return [item.strip() for item in str(value).split(",") if item.strip()]
return caster(value)
def apply_settings(config: dict, settings: dict) -> dict:
if any(key in SETTINGS_SCHEMA["app"] for key in settings):
settings = {"app": settings}
applied = {}
for section, fields in SETTINGS_SCHEMA.items():
values = settings.get(section)
if not isinstance(values, dict):
continue
target = config.setdefault(section, {})
applied_section = applied.setdefault(section, {})
for key, caster in fields.items():
if key not in values:
continue
target[key] = coerce_value(values[key], caster)
applied_section[key] = target[key]
if not applied_section:
applied.pop(section, None)
if isinstance(settings.get("drives"), list):
drives = []
for idx, drive in enumerate(settings["drives"]):
if not isinstance(drive, dict):
continue
existing = (config.get("drives") or [{}] * (idx + 1))[idx] if idx < len(config.get("drives", [])) else {}
drives.append({
"id": str(drive.get("id", existing.get("id", f"drive{idx + 1}"))),
"name": str(drive.get("name", existing.get("name", f"Media Drive {idx + 1}"))),
"path": str(drive.get("path", existing.get("path", ""))),
"min_free_gb": int(drive.get("min_free_gb", existing.get("min_free_gb", 20))),
})
config["drives"] = drives
applied["drives"] = drives
if isinstance(settings.get("release_providers"), list):
providers = []
for provider in settings["release_providers"]:
if not isinstance(provider, dict):
continue
providers.append({
"id": str(provider.get("id", "")),
"name": str(provider.get("name", "")),
"enabled": bool(provider.get("enabled", False)),
"type": str(provider.get("type", "rss")),
"url": str(provider.get("url", "")),
})
config["release_providers"] = providers
applied["release_providers"] = providers
return applied
CONFIG = load_config()
configure_logging(CONFIG["paths"]["logs"], CONFIG["app"].get("log_level", "INFO"))
STORE = JsonStore(CONFIG["paths"]["data"])
apply_settings(CONFIG, STORE.snapshot().get("settings", {}))
SCANNER = Scanner(CONFIG, STORE)
class Handler(BaseHTTPRequestHandler):
server_version = "Sortarr/0.1"
def log_message(self, fmt: str, *args) -> None:
return
def send_json(self, payload, status=HTTPStatus.OK) -> None:
body = json.dumps(payload, indent=2).encode()
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
def do_OPTIONS(self) -> None:
self.send_response(204)
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "GET,POST,OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
self.end_headers()
def do_GET(self) -> None:
parsed_url = urlparse(self.path)
path = parsed_url.path
try:
if path == "/api/health":
self.send_json({"ok": True})
elif path == "/api/config":
self.send_json(public_config(CONFIG))
elif path == "/api/dashboard":
snap = STORE.snapshot()
cached_library = snap.get("library") or {
"drives": drive_stats(CONFIG),
"items": [],
"counts": {"movies": 0, "tv": 0, "total": 0},
"extensions": {},
"scanned_files": 0,
"truncated": False,
"cached": False,
}
cached_library = normalize_library(cached_library)
cached_library.pop("items", None)
public_state = {
"events": snap.get("events", [])[:200],
"organizer": snap.get("organizer", {"queue": [], "updated_at": None}),
"settings": snap.get("settings", {}),
"updated_at": snap.get("updated_at"),
}
self.send_json({
"state": public_state,
"library": cached_library,
"dry_run": CONFIG["app"].get("dry_run"),
})
elif path == "/api/downloads":
self.send_json({"downloads": downloads_snapshot(CONFIG, STORE.snapshot())})
elif path == "/api/releases":
self.send_json({"releases": fetch_releases(CONFIG, STORE.snapshot().get("library"))})
elif path == "/api/media/probe":
params = parse_qs(parsed_url.query)
target = unquote((params.get("path") or [""])[0])
self.send_json({"media": media_probe(CONFIG, target)})
elif path == "/api/metadata/search":
params = parse_qs(parsed_url.query)
query = unquote((params.get("query") or [""])[0])
kind = unquote((params.get("type") or ["movie"])[0])
self.send_json({"results": search_tmdb(CONFIG, kind, query)})
elif path == "/api/tools/subtitles":
self.send_json({"audit": subtitle_audit(CONFIG, STORE.snapshot().get("library"))})
elif path == "/api/tools/transcoder":
self.send_json({"transcoder": transcode_plan(CONFIG, STORE.snapshot().get("library"))})
elif path == "/api/theme/custom.css":
custom = CONFIG.get("theme", {}).get("custom_css_path")
if custom and CONFIG.get("theme", {}).get("allow_custom_css", True) and os.path.exists(custom):
body = open(custom, "rb").read()
self.send_response(200)
self.send_header("Content-Type", "text/css")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
else:
self.send_response(404)
self.end_headers()
else:
self.send_json({"error": "not found"}, HTTPStatus.NOT_FOUND)
except Exception as exc:
self.send_json({"error": str(exc)}, HTTPStatus.INTERNAL_SERVER_ERROR)
def do_POST(self) -> None:
path = urlparse(self.path).path
try:
if path == "/api/scan":
started = SCANNER.request_scan()
snap = STORE.snapshot()
self.send_json({
"started": started,
"status": "started" if started else "already-running",
"queue": snap.get("organizer", {}).get("queue", []),
}, HTTPStatus.ACCEPTED)
elif path == "/api/organizer/approve":
length = int(self.headers.get("Content-Length", "0") or "0")
body = self.rfile.read(length).decode() if length else "{}"
payload = json.loads(body)
plan_id = payload.get("id")
snap = STORE.snapshot()
queue = snap.get("organizer", {}).get("queue", [])
plan = next((item for item in queue if item.get("id") == plan_id), None)
if not plan:
self.send_json({"error": "plan not found"}, HTTPStatus.NOT_FOUND)
return
result = execute_bundle_plan(CONFIG, plan, force=True)
updated = [result if item.get("id") == plan_id else item for item in queue]
STORE.set_organizer_queue(updated)
STORE.add_event("info", f"approved organizer plan: {result.get('result')}", path=result.get("source"), confidence=result.get("confidence"))
self.send_json({"plan": result})
elif path == "/api/organizer/skip":
length = int(self.headers.get("Content-Length", "0") or "0")
body = self.rfile.read(length).decode() if length else "{}"
payload = json.loads(body)
plan_id = payload.get("id")
snap = STORE.snapshot()
queue = snap.get("organizer", {}).get("queue", [])
updated = [{**item, "status": "skipped", "result": "skipped"} if item.get("id") == plan_id else item for item in queue]
STORE.set_organizer_queue(updated)
self.send_json({"ok": True})
elif path == "/api/library/identify":
length = int(self.headers.get("Content-Length", "0") or "0")
body = self.rfile.read(length).decode() if length else "{}"
payload = json.loads(body)
key = payload.get("key")
tmdb_id = payload.get("tmdb_id")
kind = payload.get("type")
snap = STORE.snapshot()
library = snap.get("library", {})
collections = library.get("collections", {"movies": [], "series": []})
found_item = None
if kind == "movie":
for item in collections["movies"]:
if item["key"] == key:
found_item = identify_item(CONFIG, item, tmdb_id, kind)
break
else:
for item in collections["series"]:
if item["key"] == key:
found_item = identify_item(CONFIG, item, tmdb_id, "tv")
break
if found_item:
STORE.set_library(library)
self.send_json({"ok": True, "item": found_item})
else:
self.send_json({"error": "item not found"}, HTTPStatus.NOT_FOUND)
elif path == "/api/library/scan":
library = library_snapshot(CONFIG)
STORE.set_library(library)
self.send_json({"library": library})
elif path == "/api/tools/transcoder/run-next":
result = run_next_transcode(CONFIG, STORE.snapshot().get("library"))
STORE.add_event("info", f"transcoder: {result.get('status')}")
self.send_json({"transcoder": result})
elif path == "/api/metadata/tmdb/test":
self.send_json({"tmdb": test_tmdb(CONFIG)})
elif path == "/api/media/tracks":
length = int(self.headers.get("Content-Length", "0") or "0")
body = self.rfile.read(length).decode() if length else "{}"
payload = json.loads(body)
result = edit_track(CONFIG, payload.get("path", ""), payload.get("action", ""), int(payload.get("stream_index", -1)))
STORE.add_event("info", f"track edit: {result.get('status')}", path=payload.get("path", ""))
self.send_json({"media": result})
elif path == "/api/settings":
length = int(self.headers.get("Content-Length", "0") or "0")
body = self.rfile.read(length).decode() if length else "{}"
updates = json.loads(body)
applied = apply_settings(CONFIG, updates)
snap = STORE.snapshot()
settings = snap.get("settings", {})
deep_merge(settings, applied)
STORE.state["settings"] = settings
STORE.save()
self.send_json({"settings": applied, "config": public_config(CONFIG)})
else:
self.send_json({"error": "not found"}, HTTPStatus.NOT_FOUND)
except Exception as exc:
self.send_json({"error": str(exc)}, HTTPStatus.INTERNAL_SERVER_ERROR)
def main() -> None:
SCANNER.start()
host = os.getenv("SORTARR_HOST", "0.0.0.0")
port = int(os.getenv("SORTARR_API_PORT", "8099"))
ThreadingHTTPServer((host, port), Handler).serve_forever()
if __name__ == "__main__":
main()

75
dist/sortarr/backend/sortarr/cache.py vendored Normal file
View File

@@ -0,0 +1,75 @@
from __future__ import annotations
import hashlib
import json
import os
import time
from pathlib import Path
from typing import Any
def cache_root(config: dict) -> Path:
root = Path(config.get("paths", {}).get("cache") or Path(config["paths"]["data"]) / "cache")
root.mkdir(parents=True, exist_ok=True)
return root
def cache_path(config: dict, namespace: str, key: str) -> Path:
digest = hashlib.sha256(key.encode()).hexdigest()
path = cache_root(config) / namespace / f"{digest}.json"
path.parent.mkdir(parents=True, exist_ok=True)
return path
def get_json(config: dict, namespace: str, key: str, ttl_seconds: int | None = None) -> Any | None:
path = cache_path(config, namespace, key)
if not path.exists():
return None
if ttl_seconds is not None and time.time() - path.stat().st_mtime > ttl_seconds:
return None
try:
return json.loads(path.read_text())
except (OSError, json.JSONDecodeError):
return None
def set_json(config: dict, namespace: str, key: str, value: Any) -> None:
path = cache_path(config, namespace, key)
tmp = path.with_suffix(".tmp")
tmp.write_text(json.dumps(value, sort_keys=True))
tmp.replace(path)
prune(config)
def remove_json(config: dict, namespace: str, key: str) -> None:
path = cache_path(config, namespace, key)
try:
path.unlink()
except FileNotFoundError:
return
def prune(config: dict) -> None:
root = cache_root(config)
max_bytes = int(config.get("app", {}).get("cache_max_bytes", 20 * 1024**3))
files = []
total = 0
for current, _, names in os.walk(root):
for name in names:
path = Path(current) / name
try:
stat = path.stat()
except OSError:
continue
total += stat.st_size
files.append((stat.st_mtime, stat.st_size, path))
if total <= max_bytes:
return
for _, size, path in sorted(files):
try:
path.unlink()
total -= size
except OSError:
continue
if total <= max_bytes:
break

67
dist/sortarr/backend/sortarr/config.py vendored Normal file
View File

@@ -0,0 +1,67 @@
from __future__ import annotations
import copy
import os
import tomllib
from pathlib import Path
from typing import Any
def _read_toml(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
with path.open("rb") as handle:
return tomllib.load(handle)
def _merge(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]:
merged = copy.deepcopy(base)
for key, value in override.items():
if isinstance(value, dict) and isinstance(merged.get(key), dict):
merged[key] = _merge(merged[key], value)
else:
merged[key] = copy.deepcopy(value)
return merged
def _bool(value: str) -> bool:
return value.strip().lower() in {"1", "true", "yes", "on"}
def load_config() -> dict[str, Any]:
default_path = Path(os.getenv("SORTARR_DEFAULT_CONFIG", "/app/default-config/app.toml"))
user_path = Path(os.getenv("SORTARR_CONFIG", "/config/app.toml"))
config = _merge(_read_toml(default_path), _read_toml(user_path))
app = config.setdefault("app", {})
paths = config.setdefault("paths", {})
env_map = {
"SORTARR_DRY_RUN": ("app", "dry_run", _bool),
"SORTARR_LOG_LEVEL": ("app", "log_level", str),
"SORTARR_SCAN_INTERVAL_SECONDS": ("app", "scan_interval_seconds", int),
"SORTARR_SETTLE_SECONDS": ("app", "settle_seconds", int),
"SORTARR_DATA_DIR": ("paths", "data", str),
"SORTARR_LOG_DIR": ("paths", "logs", str),
"SORTARR_CACHE_DIR": ("paths", "cache", str),
"TMDB_API_KEY": ("metadata", "tmdb_api_key", str),
"TMDB_BEARER_TOKEN": ("metadata", "tmdb_bearer_token", str),
}
for env, (section, key, caster) in env_map.items():
if os.getenv(env) not in (None, ""):
config.setdefault(section, {})[key] = caster(os.environ[env])
if os.getenv("SORTARR_MIN_FREE_GB"):
for drive in config.get("drives", []):
drive["min_free_gb"] = int(os.environ["SORTARR_MIN_FREE_GB"])
Path(paths.get("data", "/data")).mkdir(parents=True, exist_ok=True)
Path(paths.get("logs", "/logs")).mkdir(parents=True, exist_ok=True)
Path(paths.get("cache", str(Path(paths.get("data", "/data")) / "cache"))).mkdir(parents=True, exist_ok=True)
app.setdefault("dry_run", True)
return config
def public_config(config: dict[str, Any]) -> dict[str, Any]:
clone = copy.deepcopy(config)
return clone

View File

@@ -0,0 +1,139 @@
from __future__ import annotations
import time
from collections import defaultdict
from pathlib import Path
def empty_snapshot(root: Path, error: str | None = None) -> dict:
return {
"path": str(root),
"generated_at": time.time(),
"current": [],
"bundles": [],
"loose": [],
"recent": [],
"counts": {
"current": 0,
"recent": 0,
"media": 0,
"subtitles": 0,
"incomplete": 0,
},
"total_size": 0,
"error": error,
}
def downloads_snapshot(config: dict, state: dict) -> dict:
root = Path(config["paths"]["downloads"])
app = config.get("app", {})
media_extensions = set(app.get("media_extensions", []))
subtitle_extensions = set(app.get("subtitle_extensions", []))
incomplete = set(app.get("incomplete_suffixes", []))
current = []
media_files = []
subtitle_files = []
total_size = 0
try:
root.mkdir(parents=True, exist_ok=True)
paths = root.rglob("*")
for path in paths:
if not path.is_file():
continue
try:
stat = path.stat()
except OSError:
continue
suffix = path.suffix.lower()
total_size += stat.st_size
item = {
"name": path.name,
"path": str(path),
"relative_path": str(path.relative_to(root)),
"folder": str(path.parent.relative_to(root)) if path.parent != root else "",
"size": stat.st_size,
"modified": stat.st_mtime,
"extension": suffix or "none",
"is_media": suffix in media_extensions,
"is_subtitle": suffix in subtitle_extensions,
"is_incomplete": suffix in incomplete,
}
current.append(item)
if item["is_media"]:
media_files.append(item)
elif item["is_subtitle"]:
subtitle_files.append(item)
except OSError as exc:
return empty_snapshot(root, str(exc))
subtitles_by_folder = defaultdict(list)
for subtitle in subtitle_files:
subtitles_by_folder[subtitle["folder"]].append(subtitle)
parent = Path(subtitle["folder"])
if parent.name.lower() in {"subs", "subtitles"}:
subtitles_by_folder[str(parent.parent) if str(parent.parent) != "." else ""].append(subtitle)
bundles = []
bundled_subtitle_paths = set()
for media in media_files:
folder_subtitles = subtitles_by_folder.get(media["folder"], [])
stem_matches = [
subtitle for subtitle in subtitle_files
if subtitle["name"].lower().startswith(Path(media["name"]).stem.lower())
]
seen = set()
subtitles = []
for subtitle in folder_subtitles + stem_matches:
if subtitle["path"] in seen:
continue
seen.add(subtitle["path"])
bundled_subtitle_paths.add(subtitle["path"])
subtitles.append(subtitle)
bundles.append({
"media": media,
"subtitles": sorted(subtitles, key=lambda item: item["name"].lower()),
"sidecars": [
item for item in current
if item["folder"] == media["folder"] and not item["is_media"] and not item["is_subtitle"]
][:20],
"size": media["size"] + sum(item["size"] for item in subtitles),
})
loose = [
item for item in current
if not item["is_media"] and item["path"] not in bundled_subtitle_paths
]
recent = []
for item in state.get("items", []):
source = item.get("source", "")
status = item.get("status")
if source.startswith(str(root)) and status in {"moved", "planned"}:
recent.append({
"source": source,
"destination": item.get("destination"),
"title": item.get("title"),
"type": item.get("type"),
"status": status,
"drive": item.get("drive"),
"updated_at": item.get("updated_at"),
})
return {
"path": str(root),
"generated_at": time.time(),
"current": sorted(current, key=lambda item: item["modified"], reverse=True),
"bundles": sorted(bundles, key=lambda item: item["media"]["modified"], reverse=True),
"loose": sorted(loose, key=lambda item: item["modified"], reverse=True),
"recent": sorted(recent, key=lambda item: item.get("updated_at") or 0, reverse=True)[:200],
"counts": {
"current": len(current),
"recent": len(recent),
"media": sum(1 for item in current if item["is_media"]),
"subtitles": sum(1 for item in current if item["is_subtitle"]),
"incomplete": sum(1 for item in current if item["is_incomplete"]),
},
"total_size": total_size,
}

View File

@@ -0,0 +1,7 @@
from urllib.request import urlopen
with urlopen("http://127.0.0.1:8099/api/health", timeout=3) as response:
if response.status != 200:
raise SystemExit(1)

261
dist/sortarr/backend/sortarr/library.py vendored Normal file
View File

@@ -0,0 +1,261 @@
from __future__ import annotations
import os
import re
import time
from collections import Counter
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from .metadata import movie_metadata, series_metadata
from .parser import parse_media
from .storage import drive_stats
LIBRARY_ROOT_NAMES = {"movies", "shows", "tv", "tv shows"}
TV_ROOT_NAMES = {"shows", "tv", "tv shows"}
EPISODE_RE = re.compile(r"[Ss](\d{1,2})[ ._-]*[Ee](\d{1,3})")
SEASON_FOLDER_RE = re.compile(r"season[ ._-]*(\d{1,2})", re.I)
YEAR_RE = re.compile(r"\((19\d{2}|20\d{2})\)")
def library_roots(root: Path) -> list[Path]:
matches = []
try:
children = list(root.iterdir())
except OSError:
return matches
for child in children:
if child.is_dir() and child.name.lower() in LIBRARY_ROOT_NAMES:
matches.append(child)
return matches
def library_kind(library_root: Path) -> str:
return "tv" if library_root.name.lower() in TV_ROOT_NAMES else "movie"
def infer_library_kind(path: str) -> str:
parts = {part.lower() for part in Path(path).parts}
if parts & TV_ROOT_NAMES:
return "tv"
if "movies" in parts:
return "movie"
return "other"
def split_library_path(path: str) -> tuple[str, list[str]]:
parts = list(Path(path).parts)
lowered = [part.lower() for part in parts]
for root in LIBRARY_ROOT_NAMES:
if root in lowered:
idx = lowered.index(root)
return parts[idx], parts[idx + 1:]
return "", parts
def clean_collection_title(name: str) -> tuple[str, int | None]:
year_match = YEAR_RE.search(name)
year = int(year_match.group(1)) if year_match else None
title = YEAR_RE.sub("", name).strip(" -._") or name
return title, year
def item_identity(item: dict) -> dict:
root, rel = split_library_path(item.get("path", ""))
kind = item.get("library") or infer_library_kind(item.get("path", ""))
parsed = parse_media(item.get("path", item.get("name", "")))
if kind == "tv" and rel:
# TV shows are usually in a folder named after the show.
# We take the first part after the library root as the show folder name.
title = rel[0].strip()
season = parsed.get("season")
episode = parsed.get("episode")
for part in rel:
match = SEASON_FOLDER_RE.search(part)
if match and not season:
season = int(match.group(1))
# Clean the folder name for a consistent key
clean_name = clean_title(title).lower()
return {
"kind": "tv",
"title": title,
"key": f"tv::{clean_name}",
"season": season,
"episode": episode,
}
# For movies, we use the cleaned title and year
title, year = clean_collection_title(rel[0] if rel else parsed["title"])
clean_name = clean_title(title).lower()
year_val = year or parsed.get("year") or ""
return {
"kind": "movie",
"title": title,
"year": year_val,
"key": f"movie::{clean_name}::{year_val}",
}
def normalize_library(library: dict) -> dict:
items = library.get("items", [])
kinds = Counter()
for item in items:
kind = item.get("library") or infer_library_kind(item.get("path", ""))
item["library"] = kind
if kind in {"movie", "tv"}:
kinds[kind] += 1
library["counts"] = {
"movies": kinds.get("movie", 0),
"tv": kinds.get("tv", 0),
"total": len(items),
}
if "collections" not in library:
library["collections"] = build_collections({}, items)
return library
def build_collections(config: dict, items: list[dict], enrich: bool = False) -> dict:
movies: dict[str, dict] = {}
series: dict[str, dict] = {}
for item in items:
identity = item_identity(item)
if identity["kind"] == "tv":
show = series.setdefault(identity["key"], {
"key": identity["key"],
"title": identity["title"],
"library": "tv",
"files": [],
"seasons": {},
"metadata": {"title": identity["title"], "source": "filename", "seasons": {}},
})
show["files"].append(item)
season_no = identity.get("season") or 0
episode_no = identity.get("episode") or 0
season = show["seasons"].setdefault(str(season_no), {"season": season_no, "episodes": {}})
episode = season["episodes"].setdefault(str(episode_no), {
"season": season_no,
"episode": episode_no,
"title": f"S{season_no:02d}E{episode_no:02d}" if season_no and episode_no else item["name"],
"files": [],
"status": "present",
})
episode["files"].append(item)
else:
movie = movies.setdefault(identity["key"], {
"key": identity["key"],
"title": identity["title"],
"year": identity.get("year"),
"library": "movie",
"files": [],
"metadata": {"title": identity["title"], "source": "filename"},
})
movie["files"].append(item)
if enrich and config:
workers = int(config.get("app", {}).get("metadata_parallelism", 8))
tasks = {}
with ThreadPoolExecutor(max_workers=max(1, min(workers, 12))) as executor:
for movie in movies.values():
future = executor.submit(movie_metadata, config, movie["title"], movie.get("year"))
tasks[future] = movie
for show in series.values():
present_seasons = {int(season) for season in show["seasons"] if int(season) > 0}
future = executor.submit(series_metadata, config, show["title"], present_seasons)
tasks[future] = show
for future in as_completed(tasks):
try:
tasks[future]["metadata"] = future.result()
except Exception:
pass
today = time.strftime("%Y-%m-%d")
for show in series.values():
for season_no, season_meta in show.get("metadata", {}).get("seasons", {}).items():
season = show["seasons"].setdefault(season_no, {"season": int(season_no), "episodes": {}})
for meta_episode in season_meta.get("episodes", []):
key = str(meta_episode.get("episode") or 0)
existing = season["episodes"].get(key)
if existing:
existing.update({
"title": meta_episode.get("title") or existing["title"],
"air_date": meta_episode.get("air_date"),
"overview": meta_episode.get("overview"),
"still": meta_episode.get("still"),
})
else:
air_date = meta_episode.get("air_date")
season["episodes"][key] = {
**meta_episode,
"files": [],
"status": "upcoming" if air_date and air_date > today else "missing",
}
for season in show["seasons"].values():
season["episodes"] = sorted(season["episodes"].values(), key=lambda ep: ep.get("episode") or 0)
show["seasons"] = sorted(show["seasons"].values(), key=lambda season: season["season"])
return {
"movies": sorted(movies.values(), key=lambda movie: movie["title"].lower()),
"series": sorted(series.values(), key=lambda show: show["title"].lower()),
}
def library_snapshot(config: dict) -> dict:
items = []
extensions = Counter()
ignored_dirs = {"$RECYCLE.BIN", "System Volume Information", ".Trash-1000"}
app = config["app"]
max_files = int(app.get("library_scan_max_files", 20000))
deadline = time.monotonic() + int(app.get("library_scan_timeout_seconds", 8))
scanned = 0
truncated = False
for drive in config.get("drives", []):
if scanned >= max_files or time.monotonic() >= deadline:
truncated = True
break
root = Path(drive["path"])
if not root.exists():
continue
for library_root in library_roots(root):
kind = library_kind(library_root)
for current, dirs, files in os.walk(library_root, onerror=lambda error: None):
if scanned >= max_files or time.monotonic() >= deadline:
truncated = True
break
dirs[:] = [name for name in dirs if name not in ignored_dirs]
lower_files = {name.lower() for name in files}
for filename in files:
if scanned >= max_files or time.monotonic() >= deadline:
truncated = True
break
path = Path(current) / filename
try:
stat = path.stat()
except OSError:
continue
scanned += 1
extensions[path.suffix.lower() or "none"] += 1
if path.suffix.lower() in app.get("media_extensions", []):
subtitle_names = [
f"{path.stem}{ext}".lower()
for ext in app.get("subtitle_extensions", [])
]
items.append({
"path": str(path),
"name": path.name,
"drive": drive["id"],
"library": kind,
"root": library_root.name,
"size": stat.st_size,
"modified": stat.st_mtime,
"has_subtitles": any(name in lower_files for name in subtitle_names),
})
return normalize_library({
"drives": drive_stats(config),
"items": sorted(items, key=lambda item: item["modified"], reverse=True),
"collections": build_collections(config, items, enrich=True),
"extensions": dict(extensions.most_common()),
"scanned_files": scanned,
"truncated": truncated,
})

View File

@@ -0,0 +1,25 @@
from __future__ import annotations
import sys
import logging
from logging.handlers import RotatingFileHandler
from pathlib import Path
def configure_logging(log_dir: str, level: str) -> None:
Path(log_dir).mkdir(parents=True, exist_ok=True)
formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s")
root = logging.getLogger()
root.setLevel(getattr(logging, level.upper(), logging.INFO))
root.handlers.clear()
stream = logging.StreamHandler()
stream.setFormatter(formatter)
root.addHandler(stream)
try:
file_handler = RotatingFileHandler(Path(log_dir) / "sortarr.log", maxBytes=5_000_000, backupCount=5)
file_handler.setFormatter(formatter)
root.addHandler(file_handler)
except OSError as exc:
print(f"Sortarr could not open file logging in {log_dir}: {exc}", file=sys.stderr)

View File

@@ -0,0 +1,121 @@
from __future__ import annotations
import json
import os
import subprocess
from pathlib import Path
from .cache import get_json, remove_json, set_json
def _allowed_roots(config: dict) -> list[Path]:
roots = [Path(drive["path"]).resolve() for drive in config.get("drives", [])]
roots.append(Path(config["paths"]["downloads"]).resolve())
return roots
def assert_allowed_path(config: dict, path: str) -> Path:
target = Path(path).resolve()
for root in _allowed_roots(config):
try:
target.relative_to(root)
return target
except ValueError:
continue
raise ValueError("path is outside configured media and downloads roots")
def media_probe(config: dict, path: str) -> dict:
target = assert_allowed_path(config, path)
stat = target.stat()
cache_key = f"{target}:{stat.st_size}:{int(stat.st_mtime)}"
cached = get_json(config, "ffprobe", cache_key)
if cached is not None:
return cached
command = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-show_streams",
str(target),
]
completed = subprocess.run(command, capture_output=True, text=True, timeout=60)
if completed.returncode != 0:
return {"path": str(target), "status": "failed", "stderr": completed.stderr[-4000:]}
payload = json.loads(completed.stdout or "{}")
streams = payload.get("streams", [])
result = {
"path": str(target),
"cache_key": cache_key,
"status": "ok",
"format": payload.get("format", {}),
"audio": [stream for stream in streams if stream.get("codec_type") == "audio"],
"subtitles": [stream for stream in streams if stream.get("codec_type") == "subtitle"],
"video": [stream for stream in streams if stream.get("codec_type") == "video"],
}
set_json(config, "ffprobe", cache_key, result)
return result
def _stream_type_positions(probe: dict) -> dict[int, tuple[str, int]]:
positions = {"audio": 0, "subtitle": 0, "video": 0}
result = {}
for stream in probe.get("video", []) + probe.get("audio", []) + probe.get("subtitles", []):
codec_type = stream.get("codec_type")
if codec_type not in positions:
continue
result[int(stream["index"])] = (codec_type, positions[codec_type])
positions[codec_type] += 1
return result
def edit_track(config: dict, path: str, action: str, stream_index: int) -> dict:
target = assert_allowed_path(config, path)
probe = media_probe(config, str(target))
positions = _stream_type_positions(probe)
if stream_index not in positions:
raise ValueError("stream index was not found")
codec_type, type_index = positions[stream_index]
if codec_type not in {"audio", "subtitle"}:
raise ValueError("only audio and subtitle streams can be edited here")
tmp = target.with_suffix(target.suffix + ".tracksorting")
if action == "remove":
command = ["ffmpeg", "-hide_banner", "-y", "-i", str(target), "-map", "0", "-map", f"-0:{stream_index}", "-c", "copy", str(tmp)]
elif action == "set-default":
spec = "a" if codec_type == "audio" else "s"
command = [
"ffmpeg",
"-hide_banner",
"-y",
"-i",
str(target),
"-map",
"0",
"-c",
"copy",
f"-disposition:{spec}",
"0",
f"-disposition:{spec}:{type_index}",
"default",
str(tmp),
]
else:
raise ValueError("unsupported track action")
if config["app"].get("dry_run"):
return {"status": "dry-run", "path": str(target), "action": action, "stream_index": stream_index, "command": command}
completed = subprocess.run(command, capture_output=True, text=True, timeout=60 * 60)
if completed.returncode != 0:
try:
tmp.unlink()
except FileNotFoundError:
pass
return {"status": "failed", "returncode": completed.returncode, "stderr": completed.stderr[-4000:], "command": command}
os.replace(tmp, target)
remove_json(config, "ffprobe", probe.get("cache_key", ""))
return {"status": "updated", "path": str(target), "action": action, "stream_index": stream_index}

216
dist/sortarr/backend/sortarr/metadata.py vendored Normal file
View File

@@ -0,0 +1,216 @@
from __future__ import annotations
import json
from urllib.error import HTTPError, URLError
from urllib.parse import urlencode
from urllib.request import Request, urlopen
from .cache import get_json, set_json
TMDB_BASE = "https://api.themoviedb.org/3"
TMDB_TTL_SECONDS = 7 * 24 * 60 * 60
def _auth(config: dict) -> tuple[dict[str, str], str | None]:
meta = config.get("metadata", {})
token = meta.get("tmdb_bearer_token") or ""
api_key = meta.get("tmdb_api_key") or ""
headers = {"Accept": "application/json"}
if token:
headers["Authorization"] = f"Bearer {token}"
return headers, api_key or None
def tmdb_available(config: dict) -> bool:
meta = config.get("metadata", {})
if not meta.get("tmdb_enabled", True):
return False
return bool(meta.get("tmdb_bearer_token") or meta.get("tmdb_api_key"))
def poster_url(config: dict, path: str | None) -> str | None:
if not path:
return None
return f"{config.get('metadata', {}).get('tmdb_image_base', 'https://image.tmdb.org/t/p/w342')}{path}"
def tmdb_get(config: dict, endpoint: str, params: dict | None = None) -> dict:
headers, api_key = _auth(config)
query = dict(params or {})
query.setdefault("language", config.get("metadata", {}).get("tmdb_language", "en-US"))
if api_key:
query["api_key"] = api_key
url = f"{TMDB_BASE}{endpoint}?{urlencode(query)}"
cache_key = f"{endpoint}?{urlencode(sorted((key, value) for key, value in query.items() if key != 'api_key'))}"
cached = get_json(config, "tmdb", cache_key, TMDB_TTL_SECONDS)
if cached is not None:
return cached
timeout = int(config.get("app", {}).get("organization_metadata_timeout_seconds", 3))
with urlopen(Request(url, headers=headers), timeout=timeout) as response:
payload = json.loads(response.read().decode())
set_json(config, "tmdb", cache_key, payload)
return payload
def test_tmdb(config: dict) -> dict:
meta = config.get("metadata", {})
if not meta.get("tmdb_enabled", True):
return {"ok": False, "status": "disabled", "message": "TMDb is disabled in settings."}
headers, api_key = _auth(config)
if not api_key and "Authorization" not in headers:
return {"ok": False, "status": "missing-credentials", "message": "No TMDb API key or bearer token is configured."}
params = {"language": meta.get("tmdb_language", "en-US")}
if api_key:
params["api_key"] = api_key
url = f"{TMDB_BASE}/configuration?{urlencode(params)}"
timeout = int(config.get("app", {}).get("organization_metadata_timeout_seconds", 3))
try:
with urlopen(Request(url, headers=headers), timeout=timeout) as response:
payload = json.loads(response.read().decode())
images = payload.get("images") or {}
secure_base = images.get("secure_base_url") or images.get("base_url")
return {
"ok": True,
"status": "connected",
"message": "TMDb accepted the configured credentials.",
"image_base": secure_base,
"poster_sizes": images.get("poster_sizes") or [],
}
except HTTPError as exc:
return {"ok": False, "status": f"http-{exc.code}", "message": f"TMDb returned HTTP {exc.code}."}
except (TimeoutError, URLError) as exc:
return {"ok": False, "status": "network-error", "message": str(exc)}
except Exception as exc:
return {"ok": False, "status": "error", "message": str(exc)}
def first_result(config: dict, media_type: str, title: str, year: int | None = None) -> dict | None:
if not tmdb_available(config) or not title:
return None
params = {"query": title}
if year and media_type == "movie":
params["year"] = year
elif year:
params["first_air_date_year"] = year
try:
payload = tmdb_get(config, f"/search/{media_type}", params)
except Exception:
return None
results = payload.get("results") or []
return results[0] if results else None
def movie_metadata(config: dict, title: str, year: int | None = None) -> dict:
result = first_result(config, "movie", title, year)
if not result:
return {"title": title, "source": "filename"}
return {
"source": "tmdb",
"tmdb_id": result.get("id"),
"title": result.get("title") or title,
"overview": result.get("overview") or "",
"poster": poster_url(config, result.get("poster_path")),
"backdrop": poster_url(config, result.get("backdrop_path")),
"release_date": result.get("release_date"),
"vote_average": result.get("vote_average"),
}
def series_metadata(config: dict, title: str, seasons: set[int]) -> dict:
result = first_result(config, "tv", title)
if not result:
return {"title": title, "source": "filename", "seasons": {}}
metadata = {
"source": "tmdb",
"tmdb_id": result.get("id"),
"title": result.get("name") or title,
"overview": result.get("overview") or "",
"poster": poster_url(config, result.get("poster_path")),
"backdrop": poster_url(config, result.get("backdrop_path")),
"first_air_date": result.get("first_air_date"),
"vote_average": result.get("vote_average"),
"seasons": {},
}
for season in sorted(seasons):
try:
payload = tmdb_get(config, f"/tv/{result.get('id')}/season/{season}")
except Exception:
continue
metadata["seasons"][str(season)] = {
"name": payload.get("name"),
"air_date": payload.get("air_date"),
"episode_count": len(payload.get("episodes") or []),
"episodes": [
{
"season": season,
"episode": episode.get("episode_number"),
"title": episode.get("name"),
"overview": episode.get("overview") or "",
"air_date": episode.get("air_date"),
"still": poster_url(config, episode.get("still_path")),
}
for episode in payload.get("episodes") or []
],
}
return metadata
def search_tmdb(config: dict, media_type: str, title: str) -> list[dict]:
if not tmdb_available(config) or not title:
return []
params = {"query": title}
try:
payload = tmdb_get(config, f"/search/{media_type}", params)
except Exception:
return []
results = payload.get("results") or []
return [
{
"tmdb_id": r.get("id"),
"title": r.get("title") or r.get("name"),
"overview": r.get("overview"),
"poster": poster_url(config, r.get("poster_path")),
"release_date": r.get("release_date") or r.get("first_air_date"),
}
for r in results
]
def identify_item(config: dict, item: dict, tmdb_id: int, media_type: str) -> dict:
if not tmdb_available(config):
return item
if media_type == "movie":
try:
payload = tmdb_get(config, f"/movie/{tmdb_id}")
item["metadata"] = {
"source": "tmdb",
"tmdb_id": tmdb_id,
"title": payload.get("title") or item.get("title"),
"overview": payload.get("overview") or "",
"poster": poster_url(config, payload.get("poster_path")),
"backdrop": poster_url(config, payload.get("backdrop_path")),
"release_date": payload.get("release_date"),
"vote_average": payload.get("vote_average"),
}
except Exception:
pass
elif media_type == "tv":
try:
# We need to re-fetch seasons as well
present_seasons = {int(s["season"]) for s in item.get("seasons", []) if s.get("season")}
metadata = series_metadata(config, item["title"], present_seasons)
# If we have a specific ID, we should use it for series_metadata but series_metadata searches by title.
# Let's patch it to use the ID.
# (Simplification: for now we assume title search works well enough if we already have the ID we can
# just manually fetch what we need).
payload = tmdb_get(config, f"/tv/{tmdb_id}")
metadata.update({
"source": "tmdb",
"tmdb_id": tmdb_id,
"title": payload.get("name") or item.get("title"),
"overview": payload.get("overview") or "",
"poster": poster_url(config, payload.get("poster_path")),
})
item["metadata"] = metadata
except Exception:
pass
return item

View File

@@ -0,0 +1,293 @@
from __future__ import annotations
import logging
import os
import shutil
import time
import hashlib
from pathlib import Path
from .metadata import movie_metadata, series_metadata, tmdb_available
from .parser import parse_media
from .storage import choose_drive
LOG = logging.getLogger(__name__)
LANGUAGE_HINTS = {
"eng": "eng",
"english": "eng",
"en": "eng",
"spa": "spa",
"spanish": "spa",
"fre": "fre",
"french": "fre",
"ger": "ger",
"german": "ger",
"ita": "ita",
"jpn": "jpn",
"japanese": "jpn",
"kor": "kor",
}
def safe_name(value: str) -> str:
return "".join(ch for ch in value if ch not in '<>:"/\\|?*').strip().rstrip(".") or "Unknown"
def format_destination(config: dict, media: dict, drive: dict) -> Path:
lib = config["library"]
title = safe_name(media["title"])
year = media.get("year") or "Unknown Year"
if media["type"] == "episode":
folder_tpl = lib["series_folder"]
file_tpl = lib["episode_file"]
elif media["type"] == "season":
folder_tpl = lib["series_folder"]
file_tpl = "{title} - Season {season:02d}{quality}{ext}"
else:
folder_tpl = lib["movie_folder"] if media.get("year") else lib["unknown_folder"]
file_tpl = lib["movie_file"]
values = {
**media,
"title": title,
"year": year,
"season": media.get("season") or 1,
"episode": media.get("episode") or 1,
"episode_title": safe_name(media.get("episode_title") or "Episode"),
"ext": media["extension"],
}
folder = folder_tpl.format(**values)
filename = file_tpl.format(**values)
return Path(drive["path"]) / folder / filename
def language_suffix(path: Path) -> str:
lowered = path.stem.lower().replace(".", " ").replace("_", " ")
for token, code in LANGUAGE_HINTS.items():
if token in lowered.split():
return f".{code}"
return ""
def unique_planned_path(path: Path, rule: str, reserved: set[str]) -> Path | None:
candidate = collision_path(path, rule)
if not candidate:
return None
if str(candidate) not in reserved:
reserved.add(str(candidate))
return candidate
stem, suffix = candidate.stem, candidate.suffix
for idx in range(2, 1000):
numbered = candidate.with_name(f"{stem}.{idx}{suffix}")
if not numbered.exists() and str(numbered) not in reserved:
reserved.add(str(numbered))
return numbered
raise RuntimeError(f"Could not find collision-free name for {path}")
def tmdb_episode_title(metadata: dict, season: int | None, episode: int | None) -> str | None:
if not season or not episode:
return None
season_data = metadata.get("seasons", {}).get(str(season), {})
for item in season_data.get("episodes", []):
if item.get("episode") == episode and item.get("title"):
return item["title"]
return None
def plan_id(source: str) -> str:
return hashlib.sha256(source.encode()).hexdigest()[:16]
def quality_score(media: dict) -> int:
quality = media.get("quality", "").lower()
if "2160" in quality:
return 4
if "1080" in quality:
return 3
if "720" in quality:
return 2
if "480" in quality:
return 1
return 0
def confidence(config: dict, media: dict, metadata_enabled: bool = True) -> tuple[int, list[str], dict]:
score = 20
reasons = []
metadata = {"source": "filename", "title": media["title"]}
if media["title"] != "Unknown" and len(media["title"]) > 2:
score += 20
reasons.append("title parsed")
if media["type"] == "episode" and media.get("season") and media.get("episode"):
score += 35
reasons.append("season and episode parsed")
if media["type"] == "movie" and media.get("year"):
score += 25
reasons.append("year parsed")
if media.get("quality"):
score += 5
reasons.append("quality parsed")
if metadata_enabled and tmdb_available(config):
if media["type"] == "movie":
metadata = movie_metadata(config, media["title"], media.get("year"))
elif media["type"] == "episode":
metadata = series_metadata(config, media["title"], {media.get("season") or 1})
if metadata.get("source") == "tmdb":
score += 20
reasons.append("TMDb match")
elif tmdb_available(config):
reasons.append("metadata deferred")
return min(score, 100), reasons, metadata
def plan_bundle(config: dict, bundle: dict, metadata_enabled: bool = True) -> dict:
media_file = Path(bundle["media"]["path"])
media = parse_media(str(media_file))
score, reasons, metadata = confidence(config, media, metadata_enabled)
drive = choose_drive(config, metadata.get("title") or media["title"])
if metadata.get("source") == "tmdb":
media["title"] = metadata.get("title") or media["title"]
if media["type"] == "movie" and metadata.get("release_date") and not media.get("year"):
media["year"] = int(metadata["release_date"][:4])
if media["type"] == "episode":
media["episode_title"] = tmdb_episode_title(metadata, media.get("season"), media.get("episode")) or media.get("episode_title") or "Episode"
dest = format_destination(config, media, drive)
final = collision_path(dest, config["library"].get("collision", "keep-both"))
subtitle_moves = []
if final:
reserved = {str(final)}
for subtitle in bundle.get("subtitles", []):
subtitle_path = Path(subtitle["path"])
suffix = language_suffix(subtitle_path)
if not suffix:
suffix = ".und"
subtitle_dest = final.with_name(f"{final.stem}{suffix}{subtitle_path.suffix.lower()}")
subtitle_final = unique_planned_path(subtitle_dest, config["library"].get("collision", "keep-both"), reserved)
subtitle_moves.append({
"source": str(subtitle_path),
"destination": str(subtitle_final) if subtitle_final else None,
"language": suffix.lstrip(".") or None,
})
auto_threshold = int(config["app"].get("auto_move_min_confidence", 90))
review_threshold = int(config["app"].get("review_min_confidence", 60))
if not final:
status = "skipped"
elif score >= auto_threshold:
status = "ready"
elif score >= review_threshold:
status = "needs-review"
else:
status = "low-confidence"
return {
"id": plan_id(str(media_file)),
"source": str(media_file),
"destination": str(final) if final else None,
"media": media,
"metadata": metadata,
"drive": drive["id"],
"confidence": score,
"reasons": reasons,
"status": status,
"subtitles": subtitle_moves,
"sidecars": bundle.get("sidecars", []),
"updated_at": time.time(),
}
def collision_path(path: Path, rule: str) -> Path | None:
if not path.exists():
return path
if rule == "skip":
return None
if rule == "replace":
return path
stem, suffix = path.stem, path.suffix
for idx in range(2, 1000):
candidate = path.with_name(f"{stem} ({idx}){suffix}")
if not candidate.exists():
return candidate
raise RuntimeError(f"Could not find collision-free name for {path}")
def write_nfo(path: Path, media: dict) -> None:
nfo = path.with_suffix(".nfo")
body = [
"<movie>" if media["type"] == "movie" else "<episodedetails>",
f" <title>{media['title']}</title>",
]
if media.get("year"):
body.append(f" <year>{media['year']}</year>")
if media.get("season"):
body.append(f" <season>{media['season']}</season>")
if media.get("episode"):
body.append(f" <episode>{media['episode']}</episode>")
body.append("</movie>" if media["type"] == "movie" else "</episodedetails>")
nfo.write_text("\n".join(body) + "\n")
def plan_file(config: dict, source: Path) -> dict:
media = parse_media(str(source))
drive = choose_drive(config, media["title"])
dest = format_destination(config, media, drive)
final = collision_path(dest, config["library"].get("collision", "keep-both"))
return {
"source": str(source),
"destination": str(final) if final else None,
"media": media,
"drive": drive["id"],
"action": "skip" if final is None else ("dry-run" if config["app"].get("dry_run") else "move"),
}
def execute_plan(config: dict, plan: dict) -> dict:
if not plan.get("destination") or plan["action"] == "skip":
return {**plan, "status": "skipped"}
source = Path(plan["source"])
destination = Path(plan["destination"])
if config["app"].get("dry_run"):
return {**plan, "status": "planned"}
destination.parent.mkdir(parents=True, exist_ok=True)
tmp = destination.with_suffix(destination.suffix + ".sorting")
if tmp.exists():
tmp.unlink()
shutil.move(str(source), str(tmp))
tmp.replace(destination)
mode = int(str(config["library"].get("permissions_mode", "664")), 8)
os.chmod(destination, mode)
if config.get("metadata", {}).get("write_nfo", True):
write_nfo(destination, plan["media"])
LOG.info("Moved %s to %s", source, destination)
return {**plan, "status": "moved", "completed_at": time.time()}
def execute_bundle_plan(config: dict, plan: dict, force: bool = False) -> dict:
if not plan.get("destination") or (plan["status"] in {"skipped", "low-confidence"} and not force):
return {**plan, "result": "held"}
if plan["status"] == "needs-review" and not force:
return {**plan, "result": "held"}
if config["app"].get("dry_run"):
return {**plan, "result": "dry-run"}
source = Path(plan["source"])
destination = Path(plan["destination"])
destination.parent.mkdir(parents=True, exist_ok=True)
tmp = destination.with_suffix(destination.suffix + ".sorting")
if tmp.exists():
tmp.unlink()
shutil.move(str(source), str(tmp))
tmp.replace(destination)
mode = int(str(config["library"].get("permissions_mode", "664")), 8)
os.chmod(destination, mode)
for subtitle in plan.get("subtitles", []):
subtitle_source = Path(subtitle["source"])
if not subtitle_source.exists() or not subtitle.get("destination"):
continue
subtitle_dest = Path(subtitle["destination"])
subtitle_dest.parent.mkdir(parents=True, exist_ok=True)
shutil.move(str(subtitle_source), str(subtitle_dest))
os.chmod(subtitle_dest, mode)
if config.get("metadata", {}).get("write_nfo", True):
write_nfo(destination, plan["media"])
return {**plan, "status": "moved", "result": "moved", "completed_at": time.time()}

143
dist/sortarr/backend/sortarr/parser.py vendored Normal file
View File

@@ -0,0 +1,143 @@
from __future__ import annotations
import re
from pathlib import Path
QUALITY_RE = re.compile(r"\b(2160p|1080p|720p|480p|576p|remux|bluray|web[- .]?dl|webrip|hdtv|dvdrip|dvd|brrip|bdrip)\b", re.I)
YEAR_RE = re.compile(r"\b(19\d{2}|20\d{2})\b")
EPISODE_RE = re.compile(r"[Ss](\d{1,2})[ ._-]*[Ee](\d{1,3})(?:[ ._-]*[Ee](\d{1,3}))?")
ALT_EPISODE_RE = re.compile(r"\b(\d{1,2})x(\d{1,3})(?:[ ._-]*(\d{1,2})x(\d{1,3}))?\b")
SEASON_RE = re.compile(r"\b[Ss](?:eason)?[ ._-]*(\d{1,2})\b")
BRACKET_RE = re.compile(r"[\[(][^\])]*(?:\]|\))")
AUDIO_RE = re.compile(r"\b(?:aac|aac\d(?:[ ._-]?\d)?|ac3|eac3|ddp(?:\d(?:[ ._-]?\d)?)?|dts(?:-hd|hd|x)?|truehd|atmos|flac|mp3|opus|5[ ._-]?1|7[ ._-]?1|2[ ._-]?0|6ch|2ch)\b", re.I)
CODEC_RE = re.compile(r"\b(?:x264|x265|h[ ._-]?264|h[ ._-]?265|hevc|avc|av1|vc1|vp9|10bit|8bit|hdr|hdr10|dv|dolby[ ._-]?vision)\b", re.I)
EDITION_RE = re.compile(r"\b(?:proper|repack|rerip|extended|unrated|directors?[ ._-]?cut|theatrical|imax|multi|line|dubbed|subbed|limited|internal)\b", re.I)
RELEASE_GROUP_RE = re.compile(r"(?:^|[ ._-])(?:YTS|TGx|EZTVx?|MeGusta|PSA|RARBG|NTb|AMZN|DSNP|PMNTP|FLUX|SuccessfulCrab|GalaxyTV|VXT|QxR|TIGOLE|UTR|SARTRE|KOGI|ANONYMOUS|SNEAKY|EVO|FGT)\b", re.I)
TRAILING_GROUP_RE = re.compile(r"(?:[ ._-]+-[ ._-]*[A-Za-z0-9][A-Za-z0-9._-]{1,24})$")
def clean_title(raw: str) -> str:
text = trim_noise(raw)
# Remove year if it's at the end or preceded by space/dot
text = re.sub(r"[ ._-]+\(?(?:19\d{2}|20\d{2})\)?.*$", "", text)
text = YEAR_RE.sub(" ", text)
text = EPISODE_RE.sub(" ", text)
text = ALT_EPISODE_RE.sub(" ", text)
text = SEASON_RE.sub(" ", text)
return spaced(text) or "Unknown"
def strip_brackets(raw: str) -> str:
return BRACKET_RE.sub(" ", raw)
def strip_release_tail(raw: str) -> str:
text = strip_brackets(raw)
text = TRAILING_GROUP_RE.sub("", text)
text = RELEASE_GROUP_RE.sub(" ", text)
return spaced(text)
def first_noise_index(text: str) -> int | None:
matches = [
match.start()
for pattern in (QUALITY_RE, AUDIO_RE, CODEC_RE, EDITION_RE, RELEASE_GROUP_RE)
for match in [pattern.search(text)]
if match
]
return min(matches) if matches else None
def trim_noise(raw: str) -> str:
text = strip_release_tail(raw)
idx = first_noise_index(text)
if idx is not None:
text = text[:idx]
return spaced(text)
def clean_title(raw: str) -> str:
text = trim_noise(raw)
text = YEAR_RE.sub(" ", text)
text = EPISODE_RE.sub(" ", text)
text = ALT_EPISODE_RE.sub(" ", text)
text = SEASON_RE.sub(" ", text)
return spaced(text) or "Unknown"
def clean_episode_title(raw: str) -> str:
text = trim_noise(raw)
text = YEAR_RE.sub(" ", text)
return spaced(text) or "Episode"
def parent_candidate(path: Path) -> str:
parent = path.parent
if parent.name.lower() in {"subs", "subtitles", "sub"}:
parent = parent.parent
name = parent.name
if not name or name in {".", "/"}:
return ""
return name
def movie_title_source(path: Path, stem: str) -> str:
parent = parent_candidate(path)
if YEAR_RE.search(parent):
return parent
if YEAR_RE.search(stem):
return stem
if parent and first_noise_index(parent) is None and not EPISODE_RE.search(parent):
return parent
return stem
def parse_media(path: str) -> dict:
p = Path(path)
stem = p.stem
quality_match = QUALITY_RE.search(stem) or QUALITY_RE.search(parent_candidate(p))
year_source = stem if YEAR_RE.search(stem) else parent_candidate(p)
year_match = YEAR_RE.search(year_source)
episode_match = EPISODE_RE.search(stem)
alt_match = ALT_EPISODE_RE.search(stem)
season_match = SEASON_RE.search(stem)
media_type = "movie"
season = None
episode = None
multi_episode = ""
episode_title = ""
if episode_match:
media_type = "episode"
season = int(episode_match.group(1))
episode = int(episode_match.group(2))
if episode_match.group(3):
multi_episode = f"-E{int(episode_match.group(3)):02d}"
title = clean_title(stem[:episode_match.start()])
episode_title = clean_episode_title(stem[episode_match.end():])
elif alt_match:
media_type = "episode"
season = int(alt_match.group(1))
episode = int(alt_match.group(2))
if alt_match.group(4):
multi_episode = f"-E{int(alt_match.group(4)):02d}"
title = clean_title(stem[:alt_match.start()])
episode_title = clean_episode_title(stem[alt_match.end():])
elif season_match:
media_type = "season"
season = int(season_match.group(1))
title = clean_title(stem[:season_match.start()] or parent_candidate(p) or stem)
else:
title = clean_title(movie_title_source(p, stem))
return {
"source": str(p),
"title": title,
"year": int(year_match.group(1)) if year_match else None,
"quality": f" - {quality_match.group(1).replace('.', ' ')}" if quality_match else "",
"type": media_type,
"season": season,
"episode": episode,
"multi_episode": multi_episode,
"episode_title": episode_title if media_type == "episode" else "",
"extension": p.suffix.lower(),
}

View File

@@ -0,0 +1,59 @@
from __future__ import annotations
import json
import xml.etree.ElementTree as ET
from urllib.request import urlopen
def library_releases(library: dict | None) -> list[dict]:
releases = []
for show in ((library or {}).get("collections") or {}).get("series", []):
for season in show.get("seasons", []):
for episode in season.get("episodes", []):
if episode.get("status") not in {"missing", "upcoming"}:
continue
releases.append({
"provider": "Library",
"title": show.get("metadata", {}).get("title") or show.get("title"),
"episode_title": episode.get("title"),
"season": episode.get("season"),
"episode": episode.get("episode"),
"date": episode.get("air_date"),
"type": "tv",
"status": episode.get("status"),
"poster": show.get("metadata", {}).get("poster"),
"library_key": show.get("key"),
})
return sorted(releases, key=lambda item: (item.get("date") or "9999-99-99", item.get("title") or ""))
def fetch_releases(config: dict, library: dict | None = None) -> list[dict]:
releases: list[dict] = library_releases(library)
for provider in config.get("release_providers", []):
if not provider.get("enabled", True):
continue
try:
with urlopen(provider["url"], timeout=8) as response:
body = response.read()
if provider.get("type") == "json":
data = json.loads(body.decode())
for item in data[:30] if isinstance(data, list) else []:
show = item.get("show", item)
releases.append({
"provider": provider["name"],
"title": show.get("name"),
"date": item.get("airdate") or item.get("premiered"),
"type": "tv",
})
else:
root = ET.fromstring(body)
for item in root.findall(".//item")[:30]:
releases.append({
"provider": provider["name"],
"title": (item.findtext("title") or "").strip(),
"date": (item.findtext("pubDate") or "").strip(),
"type": "movie",
})
except Exception as exc:
releases.append({"provider": provider.get("name"), "error": str(exc)})
return releases

104
dist/sortarr/backend/sortarr/scanner.py vendored Normal file
View File

@@ -0,0 +1,104 @@
from __future__ import annotations
import logging
import threading
import time
from pathlib import Path
from .downloads import downloads_snapshot
from .organizer import execute_bundle_plan, plan_bundle
LOG = logging.getLogger(__name__)
class Scanner(threading.Thread):
def __init__(self, config: dict, store):
super().__init__(daemon=True)
self.config = config
self.store = store
self.stop_event = threading.Event()
self.scan_lock = threading.Lock()
self.seen_sizes: dict[str, tuple[int, int]] = {}
def stop(self) -> None:
self.stop_event.set()
def is_candidate(self, path: Path) -> bool:
app = self.config["app"]
if not path.is_file():
return False
if path.suffix.lower() in app.get("incomplete_suffixes", []):
return False
return path.suffix.lower() in set(app.get("media_extensions", []))
def is_stable(self, path: Path) -> bool:
stat = path.stat()
current = (stat.st_size, int(stat.st_mtime))
previous = self.seen_sizes.get(str(path))
self.seen_sizes[str(path)] = current
age = time.time() - stat.st_mtime
return previous == current and age >= int(self.config["app"].get("settle_seconds", 90))
def scan_once(self) -> list[dict]:
if not self.scan_lock.acquire(blocking=False):
return self.store.snapshot().get("organizer", {}).get("queue", [])
try:
return self._scan_once()
finally:
self.scan_lock.release()
def request_scan(self) -> bool:
if self.scan_lock.locked():
return False
thread = threading.Thread(target=self.scan_once, daemon=True)
thread.start()
return True
def _scan_once(self) -> list[dict]:
downloads = Path(self.config["paths"]["downloads"])
downloads.mkdir(parents=True, exist_ok=True)
plans: list[dict] = []
state = self.store.snapshot()
previous_items = {item.get("source"): item for item in state.get("items", [])}
snapshot = downloads_snapshot(self.config, state)
metadata_budget = int(self.config["app"].get("organization_metadata_budget_seconds", 25))
metadata_deadline = time.time() + metadata_budget
for bundle in snapshot.get("bundles", []):
path = Path(bundle["media"]["path"])
if not self.is_candidate(path) or not self.is_stable(path):
continue
try:
plan = plan_bundle(self.config, bundle, metadata_enabled=time.time() < metadata_deadline)
result = execute_bundle_plan(self.config, plan)
plans.append(result)
self.store.set_organizer_queue(plans)
item = {
"source": str(path),
"destination": result.get("destination"),
"title": result["media"]["title"],
"type": result["media"]["type"],
"status": result.get("result") or result["status"],
"drive": result.get("drive"),
"confidence": result.get("confidence"),
"updated_at": time.time(),
}
self.store.upsert_item(item)
previous = previous_items.get(str(path), {})
if (
previous.get("destination") != item.get("destination")
or previous.get("status") != item.get("status")
or previous.get("confidence") != item.get("confidence")
):
self.store.add_event("info", f"{item['status']}: {path.name}", path=str(path), confidence=item.get("confidence"))
except Exception as exc:
LOG.exception("Failed to organize %s", path)
self.store.add_event("error", str(exc), path=str(path))
self.store.set_plans(plans)
self.store.set_organizer_queue(plans)
return plans
def run(self) -> None:
while not self.stop_event.is_set():
self.scan_once()
interval = int(self.config["app"].get("scan_interval_seconds", 20))
self.stop_event.wait(interval)

53
dist/sortarr/backend/sortarr/storage.py vendored Normal file
View File

@@ -0,0 +1,53 @@
from __future__ import annotations
import os
from pathlib import Path
def disk_usage(path: str) -> dict:
usage = os.statvfs(path)
total = usage.f_frsize * usage.f_blocks
free = usage.f_frsize * usage.f_bavail
used = total - free
return {"total": total, "used": used, "free": free}
def drive_stats(config: dict) -> list[dict]:
stats = []
for drive in config.get("drives", []):
path = Path(drive["path"])
path.mkdir(parents=True, exist_ok=True)
usage = disk_usage(str(path))
stats.append({**drive, **usage})
return stats
def find_existing_home(config: dict, title: str) -> str | None:
normalized = title.lower()
for drive in config.get("drives", []):
root = Path(drive["path"])
for folder in ("Movies", "Shows"):
base = root / folder
if not base.exists():
continue
for child in base.iterdir():
if child.is_dir() and child.name.lower().startswith(normalized):
return str(root)
return None
def choose_drive(config: dict, title: str) -> dict:
existing = find_existing_home(config, title)
if existing:
for drive in config.get("drives", []):
if drive["path"] == existing:
return drive
candidates = []
for drive in drive_stats(config):
min_free = int(drive.get("min_free_gb", 0)) * 1024**3
if drive["free"] >= min_free:
candidates.append(drive)
if not candidates:
raise RuntimeError("No media drive has the configured minimum free space")
return max(candidates, key=lambda d: d["free"])

73
dist/sortarr/backend/sortarr/store.py vendored Normal file
View File

@@ -0,0 +1,73 @@
from __future__ import annotations
import json
import threading
import time
from pathlib import Path
from typing import Any
class JsonStore:
def __init__(self, data_dir: str):
self.path = Path(data_dir) / "state.json"
self.lock = threading.RLock()
self.state: dict[str, Any] = {
"events": [],
"items": [],
"plans": [],
"organizer": {"queue": [], "updated_at": None},
"library": None,
"settings": {},
"updated_at": time.time(),
}
self.load()
def load(self) -> None:
with self.lock:
if self.path.exists():
self.state.update(json.loads(self.path.read_text()))
def save(self) -> None:
with self.lock:
self.state["updated_at"] = time.time()
tmp = self.path.with_suffix(".tmp")
tmp.write_text(json.dumps(self.state, indent=2, sort_keys=True))
tmp.replace(self.path)
def add_event(self, level: str, message: str, **fields: Any) -> None:
with self.lock:
event = {"time": time.time(), "level": level, "message": message, **fields}
self.state.setdefault("events", []).insert(0, event)
self.state["events"] = self.state["events"][:500]
self.save()
def upsert_item(self, item: dict[str, Any]) -> None:
with self.lock:
items = self.state.setdefault("items", [])
key = item.get("destination") or item.get("source")
for idx, existing in enumerate(items):
if (existing.get("destination") or existing.get("source")) == key:
items[idx] = {**existing, **item}
break
else:
items.append(item)
self.save()
def set_plans(self, plans: list[dict[str, Any]]) -> None:
with self.lock:
self.state["plans"] = plans[:200]
self.save()
def set_organizer_queue(self, queue: list[dict[str, Any]]) -> None:
with self.lock:
self.state["organizer"] = {"queue": queue[:500], "updated_at": time.time()}
self.save()
def set_library(self, library: dict[str, Any]) -> None:
with self.lock:
self.state["library"] = library
self.save()
def snapshot(self) -> dict[str, Any]:
with self.lock:
return json.loads(json.dumps(self.state))

98
dist/sortarr/backend/sortarr/tools.py vendored Normal file
View File

@@ -0,0 +1,98 @@
from __future__ import annotations
import shutil
import subprocess
import time
from pathlib import Path
def subtitle_audit(config: dict, library: dict | None) -> dict:
media_extensions = set(config["app"].get("media_extensions", []))
subtitle_extensions = config["app"].get("subtitle_extensions", [])
missing = []
present = 0
unknown = 0
for item in (library or {}).get("items", []):
path = Path(item["path"])
if path.suffix.lower() not in media_extensions:
continue
if item.get("has_subtitles") is True:
present += 1
elif "has_subtitles" not in item:
unknown += 1
else:
missing.append({
"name": item["name"],
"path": str(path),
"drive": item.get("drive"),
"expected": [f"{path.stem}{ext}" for ext in subtitle_extensions[:3]],
})
return {
"checked": present + len(missing) + unknown,
"with_subtitles": present,
"unknown_count": unknown,
"missing_count": len(missing),
"missing": missing[:500],
"generated_at": time.time(),
}
def transcode_plan(config: dict, library: dict | None) -> dict:
targets = []
for item in (library or {}).get("items", []):
path = Path(item["path"])
if path.suffix.lower() == ".mp4":
continue
output = path.with_suffix(".mp4")
command = [
"ffmpeg",
"-hide_banner",
"-y",
"-i",
str(path),
"-map",
"0",
"-c:v",
"libx264",
"-preset",
"veryfast",
"-crf",
"20",
"-c:a",
"aac",
"-c:s",
"mov_text",
str(output),
]
targets.append({
"name": item["name"],
"source": str(path),
"output": str(output),
"drive": item.get("drive"),
"command": command,
})
return {
"ffmpeg_available": shutil.which("ffmpeg") is not None,
"count": len(targets),
"targets": targets[:100],
"generated_at": time.time(),
}
def run_next_transcode(config: dict, library: dict | None) -> dict:
plan = transcode_plan(config, library)
if not plan["targets"]:
return {**plan, "status": "empty"}
if not plan["ffmpeg_available"]:
return {**plan, "status": "ffmpeg-unavailable"}
if config["app"].get("dry_run"):
return {**plan, "status": "dry-run"}
target = plan["targets"][0]
completed = subprocess.run(target["command"], capture_output=True, text=True, timeout=60 * 60)
return {
**plan,
"status": "completed" if completed.returncode == 0 else "failed",
"ran": target,
"returncode": completed.returncode,
"stderr": completed.stderr[-4000:],
}