From cfbb542992290a69c9b3860fb5525836f5d39367 Mon Sep 17 00:00:00 2001 From: Flag Date: Wed, 22 Apr 2026 19:25:56 +0000 Subject: [PATCH] =?UTF-8?q?dispatcher=20=C2=97=20clean=20frames=20avant=20?= =?UTF-8?q?extract=20+=20budget=20RAM=200.55=20->=200.45?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bugs decouverts en live: 1. Les retries/restarts ne cleanaient pas frames_dir -> ffmpeg re-extrayait par dessus les anciennes -> frame_count inflate (ex: 21991 au lieu de 11000) -> budget stride fausse -> OOM. 2. Budget 0.55*RAM laissait pas assez de headroom (OS + CUDA pinned buffers + autres processes) -> kill -9 a la limite. 0.45 plus conservateur. --- scripts/dispatcher.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/dispatcher.py b/scripts/dispatcher.py index 3a6d21d..a7195d5 100644 --- a/scripts/dispatcher.py +++ b/scripts/dispatcher.py @@ -184,7 +184,9 @@ def video_duration_s(worker: dict, worker_src: str) -> float: def do_extract(job: sqlite3.Row, worker: dict) -> str: videos = json.loads(job["video_paths"]) frames_dir = f"{worker['frames_dir']}/job_{job['id']}" - ssh(worker["ssh_alias"], f"mkdir -p {shlex.quote(frames_dir)}") + # Clean any frame_*.jpg from a prior run so count_frames reflects this extraction only + # (retries/restarts otherwise inflate frame_count with duplicates). + ssh(worker["ssh_alias"], f"mkdir -p {shlex.quote(frames_dir)} && rm -f {shlex.quote(frames_dir)}/frame_*.jpg") idx = 0 total_frames_est = 0 # will be computed after each scp for v in videos: @@ -243,7 +245,7 @@ def do_reconstruct(job: sqlite3.Row, worker: dict, frames_dir: str) -> tuple[str # .87 has 23 GB RAM, .84 has 62 GB. Keep effective frame count ~4k to stay safe. frame_count = job["frame_count"] or 0 ram_gb = 23 if worker["host"] == "192.168.0.87" else 62 - ram_budget_gb = ram_gb * 0.55 + ram_budget_gb = ram_gb * 0.45 # leave headroom for model + OS + cuda pinned buffers stride = 1 while frame_count * 3.15 / 1024 / stride > ram_budget_gb: stride += 1