Files
Sortarr/backend/sortarr/organizer.py
2026-05-15 02:41:52 +00:00

321 lines
12 KiB
Python

from __future__ import annotations
import logging
import os
import shutil
import time
import hashlib
import xml.etree.ElementTree as ET
from pathlib import Path
from .metadata import movie_metadata, series_metadata, tmdb_available
from .parser import parse_media
from .storage import choose_drive
LOG = logging.getLogger(__name__)
LANGUAGE_HINTS = {
"eng": "eng",
"english": "eng",
"en": "eng",
"spa": "spa",
"spanish": "spa",
"fre": "fre",
"french": "fre",
"ger": "ger",
"german": "ger",
"ita": "ita",
"jpn": "jpn",
"japanese": "jpn",
"kor": "kor",
}
def safe_name(value: str) -> str:
return "".join(ch for ch in value if ch not in '<>:"/\\|?*').strip().rstrip(".") or "Unknown"
def format_destination(config: dict, media: dict, drive: dict) -> Path:
lib = config["library"]
title = safe_name(media["title"])
year = media.get("year") or "Unknown Year"
if media["type"] == "episode":
folder_tpl = lib["series_folder"]
file_tpl = lib["episode_file"]
elif media["type"] == "season":
folder_tpl = lib["series_folder"]
file_tpl = "{title} - Season {season:02d}{quality}{ext}"
else:
folder_tpl = lib["movie_folder"] if media.get("year") else lib["unknown_folder"]
file_tpl = lib["movie_file"]
values = {
**media,
"title": title,
"year": year,
"season": media.get("season") or 1,
"episode": media.get("episode") or 1,
"episode_title": safe_name(media.get("episode_title") or "Episode"),
"ext": media["extension"],
}
folder = folder_tpl.format(**values)
filename = file_tpl.format(**values)
return Path(drive["path"]) / folder / filename
def ensure_directory(path: Path, config: dict) -> None:
path.mkdir(parents=True, exist_ok=True)
mode = int(str(config["library"].get("directory_mode", "775")), 8)
current = path
stop = Path(config["paths"].get("downloads", "/downloads"))
try:
current.relative_to(stop)
return
except ValueError:
pass
while current != current.parent:
try:
os.chmod(current, mode)
except OSError:
pass
if any(str(current) == str(Path(drive["path"])) for drive in config.get("drives", [])):
break
current = current.parent
def language_suffix(path: Path) -> str:
lowered = path.stem.lower().replace(".", " ").replace("_", " ")
for token, code in LANGUAGE_HINTS.items():
if token in lowered.split():
return f".{code}"
return ""
def unique_planned_path(path: Path, rule: str, reserved: set[str]) -> Path | None:
candidate = collision_path(path, rule)
if not candidate:
return None
if str(candidate) not in reserved:
reserved.add(str(candidate))
return candidate
stem, suffix = candidate.stem, candidate.suffix
for idx in range(2, 1000):
numbered = candidate.with_name(f"{stem}.{idx}{suffix}")
if not numbered.exists() and str(numbered) not in reserved:
reserved.add(str(numbered))
return numbered
raise RuntimeError(f"Could not find collision-free name for {path}")
def tmdb_episode_title(metadata: dict, season: int | None, episode: int | None) -> str | None:
if not season or not episode:
return None
season_data = metadata.get("seasons", {}).get(str(season), {})
for item in season_data.get("episodes", []):
if item.get("episode") == episode and item.get("title"):
return item["title"]
return None
def plan_id(source: str) -> str:
return hashlib.sha256(source.encode()).hexdigest()[:16]
def quality_score(media: dict) -> int:
quality = media.get("quality", "").lower()
if "2160" in quality:
return 4
if "1080" in quality:
return 3
if "720" in quality:
return 2
if "480" in quality:
return 1
return 0
def confidence(config: dict, media: dict, metadata_enabled: bool = True) -> tuple[int, list[str], dict]:
score = 20
reasons = []
metadata = {"source": "filename", "title": media["title"]}
if media["title"] != "Unknown" and len(media["title"]) > 2:
score += 20
reasons.append("title parsed")
if media["type"] == "episode" and media.get("season") and media.get("episode"):
score += 35
reasons.append("season and episode parsed")
if media["type"] == "movie" and media.get("year"):
score += 25
reasons.append("year parsed")
if media.get("quality"):
score += 5
reasons.append("quality parsed")
if metadata_enabled and tmdb_available(config):
if media["type"] == "movie":
metadata = movie_metadata(config, media["title"], media.get("year"))
elif media["type"] == "episode":
metadata = series_metadata(config, media["title"], {media.get("season") or 1})
if metadata.get("source") == "tmdb":
score += 20
reasons.append("TMDb match")
elif tmdb_available(config):
reasons.append("metadata deferred")
return min(score, 100), reasons, metadata
def plan_bundle(config: dict, bundle: dict, metadata_enabled: bool = True) -> dict:
media_file = Path(bundle["media"]["path"])
media = parse_media(str(media_file))
score, reasons, metadata = confidence(config, media, metadata_enabled)
drive = choose_drive(config, metadata.get("title") or media["title"])
if metadata.get("source") == "tmdb":
media["title"] = metadata.get("title") or media["title"]
if media["type"] == "movie" and metadata.get("release_date") and not media.get("year"):
media["year"] = int(metadata["release_date"][:4])
if media["type"] == "episode":
media["episode_title"] = tmdb_episode_title(metadata, media.get("season"), media.get("episode")) or media.get("episode_title") or "Episode"
dest = format_destination(config, media, drive)
final = collision_path(dest, config["library"].get("collision", "keep-both"))
subtitle_moves = []
if final:
reserved = {str(final)}
for subtitle in bundle.get("subtitles", []):
subtitle_path = Path(subtitle["path"])
suffix = language_suffix(subtitle_path)
if not suffix:
suffix = ".und"
values = {
"basename": final.stem,
"language": suffix,
"ext": subtitle_path.suffix.lower(),
}
subtitle_name = config["library"].get("subtitle_file", "{basename}{language}{ext}").format(**values)
subtitle_dest = final.with_name(safe_name(Path(subtitle_name).stem) + subtitle_path.suffix.lower())
subtitle_final = unique_planned_path(subtitle_dest, config["library"].get("collision", "keep-both"), reserved)
subtitle_moves.append({
"source": str(subtitle_path),
"destination": str(subtitle_final) if subtitle_final else None,
"language": suffix.lstrip(".") or None,
})
auto_threshold = int(config["app"].get("auto_move_min_confidence", 90))
review_threshold = int(config["app"].get("review_min_confidence", 60))
if not final:
status = "skipped"
elif score >= auto_threshold:
status = "ready"
elif score >= review_threshold:
status = "needs-review"
else:
status = "low-confidence"
return {
"id": plan_id(str(media_file)),
"source": str(media_file),
"destination": str(final) if final else None,
"media": media,
"metadata": metadata,
"drive": drive["id"],
"confidence": score,
"reasons": reasons,
"status": status,
"subtitles": subtitle_moves,
"sidecars": bundle.get("sidecars", []),
"updated_at": time.time(),
}
def collision_path(path: Path, rule: str) -> Path | None:
if not path.exists():
return path
if rule == "skip":
return None
if rule == "replace":
return path
stem, suffix = path.stem, path.suffix
for idx in range(2, 1000):
candidate = path.with_name(f"{stem} ({idx}){suffix}")
if not candidate.exists():
return candidate
raise RuntimeError(f"Could not find collision-free name for {path}")
def write_nfo(path: Path, media: dict) -> None:
nfo = path.with_suffix(".nfo")
root = ET.Element("movie" if media["type"] == "movie" else "episodedetails")
ET.SubElement(root, "title").text = str(media["title"])
if media.get("year"):
ET.SubElement(root, "year").text = str(media["year"])
if media.get("season"):
ET.SubElement(root, "season").text = str(media["season"])
if media.get("episode"):
ET.SubElement(root, "episode").text = str(media["episode"])
tree = ET.ElementTree(root)
ET.indent(tree, space=" ")
tree.write(nfo, encoding="unicode", xml_declaration=False)
nfo.write_text(nfo.read_text() + "\n")
def plan_file(config: dict, source: Path) -> dict:
media = parse_media(str(source))
drive = choose_drive(config, media["title"])
dest = format_destination(config, media, drive)
final = collision_path(dest, config["library"].get("collision", "keep-both"))
return {
"source": str(source),
"destination": str(final) if final else None,
"media": media,
"drive": drive["id"],
"action": "skip" if final is None else ("dry-run" if config["app"].get("dry_run") else "move"),
}
def execute_plan(config: dict, plan: dict) -> dict:
if not plan.get("destination") or plan["action"] == "skip":
return {**plan, "status": "skipped"}
source = Path(plan["source"])
destination = Path(plan["destination"])
if config["app"].get("dry_run"):
return {**plan, "status": "planned"}
ensure_directory(destination.parent, config)
tmp = destination.with_suffix(destination.suffix + ".sorting")
if tmp.exists():
tmp.unlink()
shutil.move(str(source), str(tmp))
tmp.replace(destination)
mode = int(str(config["library"].get("permissions_mode", "664")), 8)
os.chmod(destination, mode)
if config.get("metadata", {}).get("write_nfo", True):
write_nfo(destination, plan["media"])
LOG.info("Moved %s to %s", source, destination)
return {**plan, "status": "moved", "completed_at": time.time()}
def execute_bundle_plan(config: dict, plan: dict, force: bool = False) -> dict:
if not plan.get("destination") or (plan["status"] in {"skipped", "low-confidence"} and not force):
return {**plan, "result": "held"}
if plan["status"] == "needs-review" and not force:
return {**plan, "result": "held"}
if config["app"].get("dry_run"):
return {**plan, "result": "dry-run"}
source = Path(plan["source"])
destination = Path(plan["destination"])
ensure_directory(destination.parent, config)
tmp = destination.with_suffix(destination.suffix + ".sorting")
if tmp.exists():
tmp.unlink()
shutil.move(str(source), str(tmp))
tmp.replace(destination)
mode = int(str(config["library"].get("permissions_mode", "664")), 8)
os.chmod(destination, mode)
for subtitle in plan.get("subtitles", []):
subtitle_source = Path(subtitle["source"])
if not subtitle_source.exists() or not subtitle.get("destination"):
continue
subtitle_dest = Path(subtitle["destination"])
ensure_directory(subtitle_dest.parent, config)
shutil.move(str(subtitle_source), str(subtitle_dest))
os.chmod(subtitle_dest, mode)
if config.get("metadata", {}).get("write_nfo", True):
write_nfo(destination, plan["media"])
return {**plan, "status": "moved", "result": "moved", "completed_at": time.time()}