from __future__ import annotations import logging import threading import time from pathlib import Path from .downloads import downloads_snapshot from .organizer import execute_bundle_plan, plan_bundle LOG = logging.getLogger(__name__) class Scanner(threading.Thread): def __init__(self, config: dict, store): super().__init__(daemon=True) self.config = config self.store = store self.stop_event = threading.Event() self.scan_lock = threading.Lock() self.seen_sizes: dict[str, tuple[int, int, int]] = {} def stop(self) -> None: self.stop_event.set() def is_candidate(self, path: Path) -> bool: app = self.config["app"] if not path.is_file(): return False if path.suffix.lower() in app.get("incomplete_suffixes", []): return False lowered = path.name.lower() for keyword in app.get("extra_keywords", []): if keyword and keyword.lower() in lowered: return False return path.suffix.lower() in set(app.get("media_extensions", [])) def is_stable(self, path: Path) -> bool: stat = path.stat() signature = (stat.st_size, int(stat.st_mtime)) previous = self.seen_sizes.get(str(path)) checks = previous[2] + 1 if previous and previous[:2] == signature else 1 current = (*signature, checks) self.seen_sizes[str(path)] = current age = time.time() - stat.st_mtime required_checks = max(1, int(self.config["app"].get("stable_checks", 2))) return checks >= required_checks and age >= int(self.config["app"].get("settle_seconds", 90)) def scan_once(self) -> list[dict]: if not self.scan_lock.acquire(blocking=False): return self.store.snapshot().get("organizer", {}).get("queue", []) try: return self._scan_once() finally: self.scan_lock.release() def request_scan(self) -> bool: if self.scan_lock.locked(): return False thread = threading.Thread(target=self.scan_once, daemon=True) thread.start() return True def _scan_once(self) -> list[dict]: downloads = Path(self.config["paths"]["downloads"]) downloads.mkdir(parents=True, exist_ok=True) plans: list[dict] = [] state = self.store.snapshot() previous_items = {item.get("source"): item for item in state.get("items", [])} snapshot = downloads_snapshot(self.config, state) metadata_budget = int(self.config["app"].get("organization_metadata_budget_seconds", 25)) metadata_deadline = time.time() + metadata_budget for bundle in snapshot.get("bundles", []): path = Path(bundle["media"]["path"]) if not self.is_candidate(path) or not self.is_stable(path): continue try: plan = plan_bundle(self.config, bundle, metadata_enabled=time.time() < metadata_deadline) result = execute_bundle_plan(self.config, plan) plans.append(result) self.store.set_organizer_queue(plans) item = { "source": str(path), "destination": result.get("destination"), "title": result["media"]["title"], "type": result["media"]["type"], "status": result.get("result") or result["status"], "drive": result.get("drive"), "confidence": result.get("confidence"), "updated_at": time.time(), } self.store.upsert_item(item) previous = previous_items.get(str(path), {}) if ( previous.get("destination") != item.get("destination") or previous.get("status") != item.get("status") or previous.get("confidence") != item.get("confidence") ): self.store.add_event("info", f"{item['status']}: {path.name}", path=str(path), confidence=item.get("confidence")) except Exception as exc: LOG.exception("Failed to organize %s", path) self.store.add_event("error", str(exc), path=str(path)) self.store.set_plans(plans) self.store.set_organizer_queue(plans) return plans def run(self) -> None: while not self.stop_event.is_set(): self.scan_once() interval = int(self.config["app"].get("scan_interval_seconds", 20)) self.stop_event.wait(interval)