From 033abc41c58f735a322406097d67960186704006 Mon Sep 17 00:00:00 2001 From: Flag Date: Wed, 22 Apr 2026 21:34:51 +0000 Subject: [PATCH] =?UTF-8?q?dispatcher=20=C2=97=20window=5Fsize=20adaptatif?= =?UTF-8?q?=20+=20skip=20video=20courtes=20<8min?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit window_size selon len effectif (frame_count / stride): <= 320 -> 16/2 (petit overhead) <= 3000 -> 32/8 > 3000 -> 64/16 (moins de windows sur longues sequences) Skip auto si video_duration_s < COSMA_QC_MIN_VIDEO_S (default 480s = 8min): segments trop courts ne contiennent pas de plongee exploitable. Reco user: videos < 8 min sont inutiles pour la reconstruction. --- scripts/dispatcher.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/scripts/dispatcher.py b/scripts/dispatcher.py index f436943..6f9f516 100644 --- a/scripts/dispatcher.py +++ b/scripts/dispatcher.py @@ -349,6 +349,12 @@ def do_extract(job: sqlite3.Row, worker: dict) -> str: # Persist the measured video duration so the dashboard shows real length (segment_label # from ingest is only the timestamp of the first MP4 and lies when a segment spans multiple). set_status(job["id"], video_duration_s=total_duration_s) + # Skip segments that are too short to contain a meaningful dive. + min_video_s = int(os.environ.get("COSMA_QC_MIN_VIDEO_S", "480")) # 8 min default + if total_duration_s < min_video_s: + print(f" ↳ job #{job['id']}: video too short ({int(total_duration_s)}s < {min_video_s}s) — marking skipped") + set_status(job["id"], status="skipped", error=f"too short: {int(total_duration_s)}s of video") + raise RuntimeError("skipped_short") # Drop the hors-eau prefix AND suffix before reconstruction — AUV is out-of-water at both ends. head, tail, remaining = trim_above_water_prefix(worker, frames_dir) if head or tail: @@ -382,12 +388,21 @@ def do_reconstruct(job: sqlite3.Row, worker: dict, frames_dir: str) -> tuple[str # demo.py starts a viser web server after saving the PLY and never exits. # Wrap it: launch in bg, wait for "PLY saved" marker in the log, kill, exit 0. # Match on the unique job frames_dir to identify our demo.py among all children/threads. + # Adapt window size to sequence length (lingbot-map README recommendation): bigger windows + # reduce overhead on long sequences. Effective frame count = frame_count / stride. + eff = frame_count // max(1, stride) if frame_count else 0 + if eff > 3000: + window_size, overlap_size = 64, 16 + elif eff > 320: + window_size, overlap_size = 32, 8 + else: + window_size, overlap_size = 16, 2 marker = shlex.quote(frames_dir) cmd = ( f"cd {shlex.quote(worker['lingbot_path'])} && source .venv/bin/activate && " f"setsid python3 demo.py --model_path {shlex.quote(ckpt)} " f"--image_folder {shlex.quote(frames_dir)} --port {port} " - f"--stride {stride} --use_sdpa --mode windowed --window_size 16 --overlap_size 2 --offload_to_cpu " + f"--stride {stride} --use_sdpa --mode windowed --window_size {window_size} --overlap_size {overlap_size} --offload_to_cpu " f"--save_ply {shlex.quote(ply_path)} > {log} 2>&1 & " f"DEMO_PID=$!; " f"for i in $(seq 1 3600); do "