dispatcher window_size adaptatif + skip video courtes <8min

window_size selon len effectif (frame_count / stride): <= 320 -> 16/2 (petit overhead) <= 3000 -> 32/8 > 3000 -> 64/16 (moins de windows sur longues sequences) Skip auto si video_duration_s < COSMA_QC_MIN_VIDEO_S (default 480s = 8min): segments trop courts ne contiennent pas de plongee exploitable. Reco user: videos < 8 min sont inutiles pour la reconstruction.
2026-04-22 21:34:51 +00:00
parent 9dd6a82d08
commit 033abc41c5
1 changed files with 16 additions and 1 deletions
--- a/scripts/dispatcher.py
+++ b/scripts/dispatcher.py
@@ -349,6 +349,12 @@ def do_extract(job: sqlite3.Row, worker: dict) -> str:
    # Persist the measured video duration so the dashboard shows real length (segment_label
    # from ingest is only the timestamp of the first MP4 and lies when a segment spans multiple).
    set_status(job["id"], video_duration_s=total_duration_s)
    # Skip segments that are too short to contain a meaningful dive.
    min_video_s = int(os.environ.get("COSMA_QC_MIN_VIDEO_S", "480"))  # 8 min default
    if total_duration_s < min_video_s:
        print(f"  ↳ job #{job['id']}: video too short ({int(total_duration_s)}s < {min_video_s}s) — marking skipped")
        set_status(job["id"], status="skipped", error=f"too short: {int(total_duration_s)}s of video")
        raise RuntimeError("skipped_short")
    # Drop the hors-eau prefix AND suffix before reconstruction — AUV is out-of-water at both ends.
    head, tail, remaining = trim_above_water_prefix(worker, frames_dir)
    if head or tail:
@@ -382,12 +388,21 @@ def do_reconstruct(job: sqlite3.Row, worker: dict, frames_dir: str) -> tuple[str
    # demo.py starts a viser web server after saving the PLY and never exits.
    # Wrap it: launch in bg, wait for "PLY saved" marker in the log, kill, exit 0.
    # Match on the unique job frames_dir to identify our demo.py among all children/threads.
    # Adapt window size to sequence length (lingbot-map README recommendation): bigger windows
    # reduce overhead on long sequences. Effective frame count = frame_count / stride.
    eff = frame_count // max(1, stride) if frame_count else 0
    if eff > 3000:
        window_size, overlap_size = 64, 16
    elif eff > 320:
        window_size, overlap_size = 32, 8
    else:
        window_size, overlap_size = 16, 2
    marker = shlex.quote(frames_dir)
    cmd = (
        f"cd {shlex.quote(worker['lingbot_path'])} && source .venv/bin/activate && "
        f"setsid python3 demo.py --model_path {shlex.quote(ckpt)} "
        f"--image_folder {shlex.quote(frames_dir)} --port {port} "
-        f"--stride {stride} --use_sdpa --mode windowed --window_size 16 --overlap_size 2 --offload_to_cpu "
+        f"--stride {stride} --use_sdpa --mode windowed --window_size {window_size} --overlap_size {overlap_size} --offload_to_cpu "
        f"--save_ply {shlex.quote(ply_path)} > {log} 2>&1 & "
        f"DEMO_PID=$!; "
        f"for i in $(seq 1 3600); do "