diff --git a/scripts/dispatcher.py b/scripts/dispatcher.py index 3a6d21d..a7195d5 100644 --- a/scripts/dispatcher.py +++ b/scripts/dispatcher.py @@ -184,7 +184,9 @@ def video_duration_s(worker: dict, worker_src: str) -> float: def do_extract(job: sqlite3.Row, worker: dict) -> str: videos = json.loads(job["video_paths"]) frames_dir = f"{worker['frames_dir']}/job_{job['id']}" - ssh(worker["ssh_alias"], f"mkdir -p {shlex.quote(frames_dir)}") + # Clean any frame_*.jpg from a prior run so count_frames reflects this extraction only + # (retries/restarts otherwise inflate frame_count with duplicates). + ssh(worker["ssh_alias"], f"mkdir -p {shlex.quote(frames_dir)} && rm -f {shlex.quote(frames_dir)}/frame_*.jpg") idx = 0 total_frames_est = 0 # will be computed after each scp for v in videos: @@ -243,7 +245,7 @@ def do_reconstruct(job: sqlite3.Row, worker: dict, frames_dir: str) -> tuple[str # .87 has 23 GB RAM, .84 has 62 GB. Keep effective frame count ~4k to stay safe. frame_count = job["frame_count"] or 0 ram_gb = 23 if worker["host"] == "192.168.0.87" else 62 - ram_budget_gb = ram_gb * 0.55 + ram_budget_gb = ram_gb * 0.45 # leave headroom for model + OS + cuda pinned buffers stride = 1 while frame_count * 3.15 / 1024 / stride > ram_budget_gb: stride += 1