stage03b: trim videos per run + ours rough cut

LRV proxy (GoPro low-res 768x432 H.264) + ffmpeg -c copy keyframe-aligned. Inputs: 02_runs.json + 03_video_index.json. Outputs: per-run mp4 + ours_<gp>.mp4 chrono concat. Tested on 20260505-Lepradet: 5 files + 2 ours (~11 GB total). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
stage01: filtre old/, fusion AUV0xx<->AUV2xx, focus date mission
2026-05-16 16:05:41 +00:00 · 2026-05-15 10:48:22 +00:00 · 2026-05-15 10:10:01 +00:00 · 2026-05-15 09:53:01 +00:00 · 2026-05-15 09:53:01 +00:00 · 2026-05-15 09:53:01 +00:00
51 changed files with 8040 additions and 1 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
+
+# pipeline
+__pycache__/
+*.pyc
+*.db
--- a/app/main.py
+++ b/app/main.py
@@ -52,6 +52,7 @@ from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates

 DB_PATH = Path(os.environ.get("COSMA_QC_DB", "/var/lib/cosma-qc/jobs.db"))
+PIPELINE_DB = Path("/cosma-pipeline/state.db")
 WORKERS = json.loads(os.environ.get("COSMA_QC_WORKERS", json.dumps([
    {"host": "192.168.0.87", "ssh_alias": "gpu",     "gpu": "RTX 3060 12GB"},
    {"host": "192.168.0.84", "ssh_alias": "cosma-vm","gpu": "RTX 3090 24GB"},
@@ -295,12 +296,63 @@ async def partial_jobs(request: Request):
    )


+@app.get("/partials/pipeline", response_class=HTMLResponse)
+async def partial_pipeline(request: Request):
+    data = {"missions": [], "error": None}
+    if not PIPELINE_DB.exists():
+        data["error"] = f"{PIPELINE_DB} introuvable"
+    else:
+        try:
+            import shutil, tempfile
+            with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+                tmp_path = tmp.name
+            shutil.copy2(str(PIPELINE_DB), tmp_path)
+            with sqlite3.connect(tmp_path) as conn:
+                conn.row_factory = sqlite3.Row
+                missions = conn.execute(
+                    "SELECT * FROM missions ORDER BY created_at DESC LIMIT 20"
+                ).fetchall()
+                for m in missions:
+                    jobs = conn.execute(
+                        "SELECT * FROM jobs WHERE mission_id=? ORDER BY stage, auv_id",
+                        (m["id"],)
+                    ).fetchall()
+                    counts = {}
+                    for j in jobs:
+                        counts[j["status"]] = counts.get(j["status"], 0) + 1
+                    auvs: list[str] = []
+                    for j in jobs:
+                        if j["auv_id"] and j["auv_id"] not in auvs:
+                            auvs.append(j["auv_id"])
+                    data["missions"].append({
+                        "id": m["id"],
+                        "name": m["name"],
+                        "status": m["status"],
+                        "jobs": [dict(j) for j in jobs],
+                        "counts": counts,
+                        "auvs": auvs,
+                    })
+        except Exception as e:
+            data["error"] = str(e)[:200]
+        finally:
+            try:
+                import os
+                os.unlink(tmp_path)
+            except Exception:
+                pass
+    return templates.TemplateResponse("_pipeline.html", {"request": request, **data})
+
+
@app.get("/partials/monitor", response_class=HTMLResponse)
 async def partial_monitor(request: Request):
-    stats = await asyncio.gather(*[_worker_stats(w) for w in WORKERS])
+    stats, orch = await asyncio.gather(
+        asyncio.gather(*[_worker_stats(w) for w in WORKERS]),
+        _orchestrator_stats(),
+    )
    return templates.TemplateResponse("_monitor.html", {
        "request": request,
        "workers": stats,
+        "orchestrator": orch,
        "dispatcher": _dispatcher_status(),
    })

@@ -353,6 +405,35 @@ async def _worker_stats(worker: dict) -> dict:
        return {**worker, "online": False, "error": str(e)[:80]}


+async def _orchestrator_stats() -> dict:
+    base = {"host": "192.168.0.83", "role": "orchestrateur (.83)", "cpu": None, "ram_used_pct": None,
+            "ram_total_mib": None, "ssd_used_pct": None, "ssd_avail": None, "online": False}
+    try:
+        cmd = (
+            r"uptime | grep -oP 'load average: \K[\d., ]+' ; "
+            "free -m | awk '/^Mem:/{print $2,$3}' ; "
+            "df -h /mnt/ssd 2>/dev/null | tail -1 || echo '- - - - - -'"
+        )
+        proc = await asyncio.create_subprocess_exec(
+            "ssh", "-o", "ConnectTimeout=3", "-o", "BatchMode=yes", "cosma-self", cmd,
+            stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
+        )
+        out, _ = await asyncio.wait_for(proc.communicate(), timeout=5)
+        lines = out.decode().strip().splitlines()
+        load = lines[0].strip() if lines else "?"
+        ram = lines[1].split() if len(lines) > 1 else ["?", "?"]
+        disk = lines[2].split() if len(lines) > 2 else ["?"] * 6
+        total_mib = int(ram[0]) if ram[0].isdigit() else None
+        used_mib = int(ram[1]) if len(ram) > 1 and ram[1].isdigit() else None
+        ram_pct = int(used_mib * 100 / total_mib) if total_mib and used_mib else None
+        return {**base, "online": True, "cpu_load": load,
+                "ram_used_pct": ram_pct, "ram_total_mib": total_mib, "ram_used_mib": used_mib,
+                "ssd_used_pct": disk[4] if len(disk) > 4 else "?",
+                "ssd_avail": disk[3] if len(disk) > 3 else "?"}
+    except Exception as e:
+        return {**base, "error": str(e)[:80]}
+
+
@app.post("/jobs/{job_id}/cancel")
 async def cancel_job(job_id: int):
    with closing(db()) as conn:
@@ -495,6 +576,86 @@ async def live_job(job_id: int):
    return {"url": row["viser_url"]}


+VISER_AUV_BASE = 9300
+PIPELINE_DATA_BASE = Path(os.environ.get("COSMA_PIPELINE_DATA", "/cosma-pipeline/data"))
+
+
+@app.post("/pipeline/missions/{mission_id}/auvs/{auv_id}/view")
+async def view_auv(mission_id: int, auv_id: str):
+    """Launch viser showing all PLYs for one AUV from a mission."""
+    if not PIPELINE_DB.exists():
+        raise HTTPException(404, "state.db introuvable")
+    import hashlib
+    import shutil
+    import tempfile
+
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+        tmp_path = tmp.name
+    shutil.copy2(str(PIPELINE_DB), tmp_path)
+    try:
+        with sqlite3.connect(tmp_path) as conn:
+            conn.row_factory = sqlite3.Row
+            m = conn.execute(
+                "SELECT name FROM missions WHERE id=?", (mission_id,)
+            ).fetchone()
+    finally:
+        try:
+            os.unlink(tmp_path)
+        except Exception:
+            pass
+    if not m:
+        raise HTTPException(404, "mission introuvable")
+    mission_name = m["name"]
+    ply_dir_local = PIPELINE_DATA_BASE / mission_name / "ply" / auv_id
+    if not ply_dir_local.exists():
+        return JSONResponse(
+            {"ok": False, "error": f"PLY dir {ply_dir_local} pas encore (stitch pas done)"},
+            status_code=409,
+        )
+
+    h = int(hashlib.md5(f"{mission_id}-{auv_id}".encode()).hexdigest()[:6], 16)
+    port = VISER_AUV_BASE + (h % 100)
+    worker = WORKERS[1] if len(WORKERS) > 1 else WORKERS[0]
+    alias = worker["ssh_alias"]
+    host = worker["host"]
+    worker_dir = f"/tmp/cosma-viser-auv/{mission_name}/{auv_id}"
+
+    rsync = await asyncio.create_subprocess_exec(
+        "rsync", "-az", "--delete",
+        "-e", "ssh -o BatchMode=yes -o StrictHostKeyChecking=no",
+        f"{ply_dir_local}/", f"{alias}:{worker_dir}/",
+        stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.PIPE,
+    )
+    _, err = await rsync.communicate()
+    if rsync.returncode != 0:
+        raise HTTPException(500, f"rsync failed: {err.decode()[:200]}")
+
+    local_script = Path(__file__).parent.parent / "scripts" / "viser_auv.py"
+    scp = await asyncio.create_subprocess_exec(
+        "scp", "-o", "BatchMode=yes", "-o", "StrictHostKeyChecking=no",
+        str(local_script), f"{alias}:/tmp/viser_auv.py",
+        stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.PIPE,
+    )
+    _, err = await scp.communicate()
+    if scp.returncode != 0:
+        raise HTTPException(500, f"scp failed: {err.decode()[:200]}")
+
+    venv_py = f"{worker.get('lingbot_path', '/root/ai-video/lingbot-map')}/.venv/bin/python"
+    launch_cmd = (
+        f"pkill -f 'viser_auv.py.*--port {port}' 2>/dev/null ; sleep 1 ; "
+        f"setsid nohup {venv_py} /tmp/viser_auv.py --ply-dir {worker_dir} --port {port} "
+        f"</dev/null >/tmp/viser_auv_{port}.log 2>&1 & disown ; sleep 0.3"
+    )
+    launch = await asyncio.create_subprocess_exec(
+        "ssh", "-o", "BatchMode=yes", "-o", "StrictHostKeyChecking=no", alias, launch_cmd,
+        stdout=asyncio.subprocess.DEVNULL, stderr=asyncio.subprocess.PIPE,
+    )
+    await launch.communicate()
+    await asyncio.sleep(4)
+
+    return {"ok": True, "url": f"http://{host}:{port}/", "auv_id": auv_id, "port": port}
+
+
@app.post("/stitches/{stitch_id}/view")
 async def view_stitch(stitch_id: int):
    with closing(db()) as conn:
--- a/app/static/style.css
+++ b/app/static/style.css
@@ -198,3 +198,34 @@ code { background: rgba(255,255,255,0.05); padding: 0 0.25rem; border-radius: 3p
 .viewer-btn { background: #1a3a2a; color: #4ade80; border: 1px solid #4ade80; border-radius: 3px; padding: 2px 8px; cursor: pointer; font-size: 0.8rem; }
 .viewer-btn:hover { background: #4ade80; color: #0a1a10; }
 .viewer-btn:disabled { opacity: 0.5; cursor: wait; }
+
+/* ==== Pipeline section ==== */
+.pipeline-mission { margin-bottom: 1rem; }
+.pm-header { display: flex; align-items: center; gap: 0.75rem; margin-bottom: 0.4rem; flex-wrap: wrap; }
+.pm-name { font-weight: 600; color: var(--accent); }
+.pm-status { font-size: 0.75rem; padding: 0.1rem 0.4rem; border-radius: 4px; text-transform: uppercase; font-weight: 600; }
+.pm-counts { display: flex; gap: 0.4rem; flex-wrap: wrap; }
+.cnt { font-size: 0.72rem; padding: 0.1rem 0.35rem; border-radius: 3px; background: rgba(255,255,255,0.05); }
+.cnt.ok { color: var(--ok); } .cnt.busy { color: var(--accent); } .cnt.warn { color: var(--warn); } .cnt.err { color: var(--err); }
+
+.pipeline-jobs-table { width: 100%; border-collapse: collapse; font-size: 0.82rem; }
+.pipeline-jobs-table th { text-align: left; padding: 3px 8px; color: var(--muted); font-size: 0.70rem; text-transform: uppercase; border-bottom: 1px solid var(--border); }
+.pipeline-jobs-table td { padding: 4px 8px; border-bottom: 1px solid rgba(255,255,255,0.03); }
+.pipeline-jobs-table tr.pj-err-row td { padding: 0 8px 4px; }
+
+.pj-badge { font-size: 0.70rem; padding: 1px 5px; border-radius: 3px; text-transform: uppercase; font-weight: 600; }
+.status-done, .pj-badge.status-done { color: var(--ok); background: rgba(61,220,132,0.1); }
+.status-running, .pj-badge.status-running { color: var(--accent); background: rgba(95,208,255,0.1); }
+.status-queued, .pj-badge.status-queued { color: var(--muted); }
+.status-degraded, .pj-badge.status-degraded { color: var(--warn); background: rgba(245,197,24,0.1); }
+.status-error, .pj-badge.status-error { color: var(--err); background: rgba(255,92,122,0.1); }
+.status-ingested, .pm-status.status-ingested { color: var(--accent); background: rgba(95,208,255,0.12); }
+
+/* AUV viser buttons (per-mission) */
+.pm-auvs { display: flex; gap: 0.4rem; flex-wrap: wrap; margin: 0.3rem 0 0.5rem; align-items: center; }
+.pm-auvs-label { color: var(--muted, #888); font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.05em; }
+.btn-viser-auv { font-size: 0.72rem; padding: 2px 8px; background: transparent;
+  border: 1px solid var(--accent, #4af); color: var(--accent, #4af); border-radius: 3px;
+  cursor: pointer; font-family: inherit; }
+.btn-viser-auv:hover { background: var(--accent, #4af); color: #062036; }
+.btn-viser-auv:disabled { opacity: 0.5; cursor: wait; }
--- a/app/templates/_monitor.html
+++ b/app/templates/_monitor.html
@@ -12,6 +12,33 @@
  </div>
 </div>

+{% if orchestrator %}
+<div class="worker {% if not orchestrator.online %}offline{% endif %}" style="margin-bottom:0.75rem">
+  <div class="hdr">
+    <b>{{ orchestrator.role }}</b>
+    <span class="gpu">orchestrateur</span>
+    <span class="state">{% if orchestrator.online %}online{% else %}offline{% endif %}</span>
+  </div>
+  {% if orchestrator.online %}
+    <div class="bar">
+      <span>CPU</span>
+      <span style="font-size:0.8rem;color:var(--accent)">{{ orchestrator.cpu_load or '?' }}</span>
+    </div>
+    <div class="bar">
+      <span>RAM</span>
+      <progress value="{{ orchestrator.ram_used_mib or 0 }}" max="{{ orchestrator.ram_total_mib or 1 }}"></progress>
+      <small>{{ orchestrator.ram_used_mib or '?' }} / {{ orchestrator.ram_total_mib or '?' }} MiB</small>
+    </div>
+    <div class="worker-meta">
+      <span class="tag muted">SSD {{ orchestrator.ssd_avail }} dispo</span>
+      <span class="tag muted">{{ orchestrator.ssd_used_pct }} utilise</span>
+    </div>
+  {% else %}
+    <div class="err">{{ orchestrator.error or "unreachable" }}</div>
+  {% endif %}
+</div>
+{% endif %}
+
 <div class="worker-grid">
  {% for w in workers %}
    <div class="worker {% if not w.online %}offline{% endif %}">
--- a/app/templates/_pipeline.html
+++ b/app/templates/_pipeline.html
@@ -0,0 +1,57 @@
+{% if error %}
+<p class="err">{{ error }}</p>
+{% elif not missions %}
+<p class="muted">Aucune mission dans state.db.</p>
+{% else %}
+{% for m in missions %}
+<div class="pipeline-mission">
+  <div class="pm-header">
+    <span class="pm-name">{{ m.name }}</span>
+    <span class="pm-status status-{{ m.status }}">{{ m.status }}</span>
+    <span class="pm-counts">
+      {% if m.counts.get('done') %}<span class="cnt ok">{{ m.counts.done }} done</span>{% endif %}
+      {% if m.counts.get('running') %}<span class="cnt busy">{{ m.counts.running }} running</span>{% endif %}
+      {% if m.counts.get('queued') %}<span class="cnt muted">{{ m.counts.queued }} queued</span>{% endif %}
+      {% if m.counts.get('degraded') %}<span class="cnt warn">{{ m.counts.degraded }} degraded</span>{% endif %}
+      {% if m.counts.get('error') %}<span class="cnt err">{{ m.counts.error }} error</span>{% endif %}
+    </span>
+  </div>
+  {% if m.auvs %}
+  <div class="pm-auvs">
+    <span class="pm-auvs-label">Viser AUV:</span>
+    {% for auv_id in m.auvs %}
+    <button class="btn-viser-auv"
+            data-url="/pipeline/missions/{{ m.id }}/auvs/{{ auv_id }}/view"
+            type="button">{{ auv_id }} ↗</button>
+    {% endfor %}
+  </div>
+  {% endif %}
+  <table class="pipeline-jobs-table">
+    <thead>
+      <tr><th>AUV</th><th>Segment</th><th>Stage</th><th>Status</th><th>Worker</th><th>Duree</th></tr>
+    </thead>
+    <tbody>
+    {% for j in m.jobs %}
+    <tr class="pj-row status-{{ j.status }}">
+      <td>{{ j.auv_id }}</td>
+      <td class="muted">{{ j.segment_label or '-' }}</td>
+      <td><code>{{ j.stage }}</code></td>
+      <td><span class="pj-badge status-{{ j.status }}">{{ j.status }}</span></td>
+      <td class="muted">{{ j.worker_host or '-' }}</td>
+      <td class="muted">
+        {% if j.started_at and j.finished_at %}
+          {{ j.finished_at[11:16] if j.finished_at else '' }}
+        {% elif j.started_at %}
+          {{ j.started_at[11:16] }} →
+        {% else %}-{% endif %}
+      </td>
+    </tr>
+    {% if j.error_msg %}
+    <tr class="pj-err-row"><td colspan="6" class="err" style="font-size:0.72rem;padding:2px 8px">{{ j.error_msg[:120] }}</td></tr>
+    {% endif %}
+    {% endfor %}
+    </tbody>
+  </table>
+</div>
+{% endfor %}
+{% endif %}
--- a/app/templates/index.html
+++ b/app/templates/index.html
@@ -18,6 +18,13 @@
  <p class="muted">Chargement des workers…</p>
 </section>

+<section id="pipeline">
+  <h2>Pipeline reconstruction</h2>
+  <div hx-get="/partials/pipeline" hx-trigger="load, every 5s" hx-swap="innerHTML">
+    <p class="muted">Chargement pipeline...</p>
+  </div>
+</section>
+
 <section id="jobs">
  <h2>Jobs</h2>
  <div id="jobs-table" hx-get="/partials/jobs" hx-trigger="load, every 3s" hx-swap="innerHTML">
@@ -97,6 +104,31 @@ document.addEventListener('click', async (e) => {
  btn.textContent = 'viser ↗';
  btn.disabled = false;
 });
+
+document.addEventListener('click', async (e) => {
+  const btn = e.target.closest('.btn-viser-auv');
+  if (!btn) return;
+  e.preventDefault();
+  const url = btn.dataset.url;
+  if (!url) return;
+  const original = btn.textContent;
+  btn.textContent = 'launch…';
+  btn.disabled = true;
+  try {
+    const res = await fetch(url, { method: 'POST' });
+    const d = await res.json().catch(() => ({}));
+    if (res.ok && d.url) {
+      window.open(d.url, '_blank');
+    } else {
+      alert(d.error || d.detail || ('HTTP ' + res.status));
+    }
+  } catch (err) {
+    alert('Erreur réseau: ' + err);
+  } finally {
+    btn.textContent = original;
+    btn.disabled = false;
+  }
+});
 </script>
 </body>
 </html>
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,6 +8,8 @@ services:
    volumes:
      - /home/cosma/cosma-qc-data:/var/lib/cosma-qc
      - /home/cosma/.ssh:/ssh-in:ro
+      - /home/cosma/cosma-pipeline:/cosma-pipeline:ro
+      - /mnt/ssd:/mnt/ssd:ro
    environment:
      COSMA_QC_WORKERS: |
        [
--- a/pipeline/README.md
+++ b/pipeline/README.md
@@ -0,0 +1,33 @@
+# cosma-pipeline
+
+Pipeline autonome de reconstruction COSMA.
+
+## Structure
+
+```
+pipeline/
+├── config/           # Seuils QA + params par défaut (versionnés)
+├── orchestrator/     # DB SQLite, dispatcher, FastAPI
+├── stages/           # Modules indépendants 01..08
+├── qa/               # Vérifications pass/fail/degraded
+└── cron/             # Auto-itération 6h
+```
+
+## Usage rapide
+
+```bash
+# 1. Ingest
+python3 pipeline/stages/01_ingest.py /mnt/ssd/20260505-Lepradet --name 20260505-Lepradet
+
+# 2. Parse USBL
+python3 pipeline/stages/02_usbl_parse.py /home/cosma/cosma-pipeline/20260505-Lepradet/manifest.json
+
+# 3. Filter
+python3 pipeline/stages/03_usbl_filter.py /home/cosma/cosma-pipeline/20260505-Lepradet/02_usbl_raw/
+```
+
+## Notes données
+
+- `logs/SUB/log/*_usbl.csv` = bytes série bruts (Waterlinked M64), PAS lat/lon
+- Navigation réelle dans `logs/SUB/bag/*.mcap` (ROS2 MCAP)
+- Mapping AUV : vidéos utilisent AUV2xx, bags utilisent AUV0xx (même 2 derniers chiffres)
--- a/pipeline/config/default_params.yaml
+++ b/pipeline/config/default_params.yaml
@@ -0,0 +1,58 @@
+# Default params per stage — overridable per-run via CLI or cron patch
+stage_01_ingest:
+  gap_min: 5                     # minutes gap to split segment
+  ssd_root: /mnt/ssd
+  output_dir: /home/cosma/cosma-pipeline
+
+stage_02_usbl_parse:
+  # USBL log/ CSVs are raw serial frames — real nav is in bag/*.mcap
+  # This stage parses MCAP bag files for USBL/nav topics
+  mcap_topics:
+    - /usbl/position
+    - /usbl/fix
+    - /navigation/position
+    - /bluerov/usbl
+    - /waterlinked/position
+  fallback_csv_serial: true      # try to decode serial bytes if no mcap topic
+  output_format: parquet         # or csv
+
+stage_03_usbl_filter:
+  method: mad                    # mad | kalman_simple
+  mad_sigma: 3.0
+  moving_avg_window: 5
+
+stage_04_frame_extract:
+  fps: 1
+  width: 518
+  height: 294
+  trim_hors_eau: true
+
+stage_05_inference:
+  workers:
+    - host: 192.168.0.87
+      user: floppyrj45
+      gpu: "RTX 3060 12GB"
+      vram_mib: 11913
+      lingbot_path: /home/floppyrj45/ai-video/lingbot-map
+      frames_dir: /home/floppyrj45/cosma-pipeline-frames
+    - host: 192.168.0.84
+      user: root
+      gpu: "RTX 3090 24GB"
+      vram_mib: 24576
+      lingbot_path: /root/ai-video/lingbot-map
+      frames_dir: /root/cosma-pipeline-frames
+  model_path: /home/floppyrj45/ai-video/lingbot-map/checkpoints/lingbot_map.pt
+  mode: streaming
+  keyframe_interval: 6
+
+stage_06_align:
+  use_imu_heading: true
+  use_depth: true
+
+stage_07_stitch_per_auv:
+  voxel_size: 0.05
+  use_ransac: true
+
+stage_08_stitch_cross_auv:
+  voxel_size: 0.1
+  final_icp: true
--- a/pipeline/config/thresholds.yaml
+++ b/pipeline/config/thresholds.yaml
@@ -0,0 +1,32 @@
+usbl:
+  min_points_per_segment: 5
+  max_gap_seconds: 30
+  mad_sigma: 3.0
+  moving_avg_window: 5
+ingest:
+  min_video_seconds: 120
+  max_timestamp_delta_seconds: 60
+frame_extract:
+  fps: 1
+  width: 518
+  height: 294
+  underwater_r_minus_g: 5
+  trim_min_frames: 8
+  bottom_visible_pct_min: 25
+inference:
+  ply_conf_threshold: 1.5
+  max_frame_num: 2048
+  mode: streaming
+  keyframe_interval: 1
+  min_frames_for_inference: 32
+  inference_timeout_s: 10800
+  offload_to_cpu: false
+align:
+  max_translation_m: 500
+  min_inlier_ratio: 0.3
+stitch:
+  voxel_size: 0.05
+  icp_max_distance: 0.5
+  icp_iterations: 50
+  use_ransac: true
+  ransac_iterations: 100000
--- a/pipeline/iteration-log.md
+++ b/pipeline/iteration-log.md
@@ -0,0 +1,173 @@
+# Pipeline COSMA — Iteration Log (auto-cron 6h)
+
+---
+
+## Itération 1 — 2026-05-11 22:33 UTC
+
+- **Signal détecté** :  seuil 50% trop strict — avg réel = 37.45%, 16/31 segments degraded. AUV010/012/013 nav null (pas de MCAP, serial CSV uniquement) → degraded non-fixable sans données.
+- **Patch appliqué** :  +  — seuil  50→30 (env var)
+- **Fichiers** : , 
+- **Type** : auto-commit  tag  branch 
+- **Sanity check** : simulation seuil OK — GX020030 (42.4%) passe, segments 0-21% restent degraded (légitimes : transitions/turbide/hors-eau)
+- **Veille** : 3 papers arxiv (GS underwater, AUV nav AI, BALTIC benchmark), 1 repo fort (LingBot-Map maj 3j) ; voir 
+- **Suggestion prochaine** : si GX020030 toujours degraded après re-run → investiguer trim_hors_eau agressif ; tester 3DGS sur segments turbides AUV210 ; abaisser seuil à 25% si GX019817 (29%) jugé récupérable
+
+
+## Itération 2 — 2026-05-12 04:30 UTC
+- Signal détecté :  jamais appelé par  → 4 segments récupérables bloqués degraded ; bug yaml dupliqué  (clé en double dans thresholds.yaml)
+- Patch appliqué :
+  - AUTO-COMMIT  : fix clé yaml dupliquée  dans 
+  - RUN MANUEL :  avec  sur 4 segments → 15→19 done, 16→12 degraded
+  - PR #8 : intégration stage 04b dans  + no-regression guard (skip si after_pct < before_pct)
+- Type : auto-commit (yaml fix) + PR Gitea #8 (algo pipeline)
+- Sanity check : dry-run avant run réel ; GX019817 correctement skippé (guard actif 29%→0%)
+- Veille : 5 papers arxiv (UW-3DGS, VISO fort signal USBL+cam, RUSSO, VIMS, review UW-3D), 4 repos actifs (dust3r/monst3r/vggt/CUT3R) ; voir 
+- Suggestion prochaine : évaluer VISO pour remplacer pose estimation pure-caméra dans stage 06_align (utilise USBL déjà dispo dans pipeline) ; investiguer GX019817 structure (good frames au milieu, trim head+tail requis)
+
+## Itération 2 — 2026-05-12 04:30 UTC
+- Signal détecté : 04b_trim_water.py jamais appelé par run_pipeline.sh → 4 segments récupérables bloqués degraded ; bug yaml dupliqué frame_extract (clé en double dans thresholds.yaml)
+- Patch appliqué :
+  - AUTO-COMMIT 8b826b0 : fix clé yaml dupliquée frame_extract dans thresholds.yaml
+  - RUN MANUEL : 04b_trim_water.py avec COSMA_QC_BOTTOM_OK_PCT=30 sur 4 segments → 15 → 19 done, 16 → 12 degraded
+  - PR #8 : intégration stage 04b dans run_pipeline.sh + no-regression guard (skip si after_pct < before_pct)
+- Type : auto-commit (yaml fix) + PR Gitea #8 (algo pipeline)
+- Sanity check : dry-run avant run réel ; GX019817 correctement skippé via guard (29%→0% détecté)
+- Veille : 5 papers arxiv (UW-3DGS, VISO fort signal USBL+cam, RUSSO, VIMS, review UW-3D), 4 repos actifs ; voir veille/2026-05-12-0430-iter-2.md
+- Suggestion prochaine : évaluer VISO arxiv:2601.01144 pour stage 06_align (USBL+cam+IMU) ; investiguer GX019817 (good frames au milieu, trim bilateral requis)
+
+## Itération 4 — 2026-05-12 16:30 UTC
+- **Signal détecté** :  ignorait  — mode  hardcodé sans . Empiriquement validé :  → 146M pts (GX049839_v2.ply) vs 0 pts (conf=2.5). GPU .84 libre. 2 jobs 05_inference done (GX039839 + GX049839).
+- **Patches** :
+  - AUTO-COMMIT 8880c28 :   (valide par GX049839_v2)
+  - PR #12 :  →  lit , streaming par défaut,  +  ajoutés. URL: https://gitea.nowyouknow.fr/floppyrj45/cosma-qc/pulls/12
+  - MANUAL : GX049839_v2.ply rsync'd → .83, enregistré state.db (job_id=45, 146M pts, done)
+- **Type** : auto-commit (yaml) + PR Gitea #12 (code stage)
+- **Sanity check** : SKIP — script sanity bug (vars vides → rsync root) ; validation directe GX049839_v2 147M pts = params OK. Pipeline: 20 done stage04, **2 done stage05** (3→2 corrigé : GX039839 + GX049839).
+- **Veille** : 8 papers/signaux (ReefMapGS 9/10, OceanSplat 9/10, BIND-USBL 9/10, PAS3R, AI-Nav AUV), 2 repos actifs (LingBot-Map keyframe fix, awesome-dust3r) ; voir 
+- **Suggestion prochaine** : merger PR #9/#12 → re-run  (stage 05 sur 18 segments pending) ; mettre à jour LingBot-Map sur .84/.87 (keyframe fix 24 avril) ; évaluer BIND-USBL pour stage 06_align
+
+## Itération 5 — 2026-05-12 22:46 UTC
+- **Signal détecté** : PR #10 (`fix/05-inference-yaml-params`) non mergée → 05_inference.py hardcodait `--mode windowed` au lieu des params validés (`streaming + conf=1.5 + offload_to_cpu`). 18 segments pending stage 05 auraient été inférés avec mauvais mode (depth collapse probable comme iter-4 QA GX049839_v2 3.6cm bbox).
+- **Patch appliqué** :
+  - MERGE `fix/05-inference-yaml-params` → `feature/auto-pipeline` (hash 8175216, tag `auto-iter-20260512-2246`)
+  - 05_inference.py lit maintenant `thresholds.yaml[inference]` : mode=streaming, conf=1.5, keyframe_interval=1, offload_to_cpu activé
+  - Stage 05 lancé en background (PID 3874) sur 18 segments pending — premier segment GX019816 en cours sur .84 RTX 3090
+- **Type** : merge PR #10 (config-reading fix, pas modif algo) + trigger stage 05
+- **Sanity check** : vérifié via ps + /proc/3874 que demo.py tourne sur .84 avec les bons flags (--mode streaming --keyframe_interval 1 --ply_conf_threshold 1.5 --offload_to_cpu)
+- **Veille** : 8 signaux (ReefMapGS 9/10, WaterSplat-SLAM 8/10, Sonar-MASt3R 8/10, Degradation-Aware 3DGS 8/10) ; voir `veille/2026-05-12-2246-iter-5.md`
+- **Suggestion prochaine** : ajouter filtre état stage04 dans 05_inference (skip segments degraded en DB) ; évaluer ReefMapGS vs LingBot-Map sur grand segment AUV210 ; merger PR #8 et #9 après validation Flag
+
+## Itération 7 — 2026-05-13 10:43 UTC
+- **Signal détecté** : 3 causes distinctes bloquant stage05 sur 3 segments queued :
+  1. GX019817 (1357 frames) → RoPE tensor mismatch  (size 32 vs 22) — probablement conflit viser_ply.py stale sur .84
+  2. GX029818 (494 frames) → TimeoutExpired 7200s — était lancé quand .84 était chargé (viser×4 + 8128MB GPU utilisé)
+  3. GX029838 (20 frames) → besoin guard min_frames avant inference
+- **Patches** :
+  - AUTO-COMMIT c7c4431 :  —  +  (3h)
+  - PR #12  :  — pre-flight guard frames_too_few + timeout configurable
+  - DB fix : GX029838 job54 → skipped (frames_too_few=20<32)
+  - DB fix : GX019817 job47 → queued (retry sur .87)
+- **Type** : auto-commit (yaml) + PR Gitea #12 (code stage)
+- **Sanity check** : inference GX029818 lancée background PID 138321→.84 PID 3299076 ; GPU 13710MB actif (11min après lancement)
+- **Veille** : 6 signaux — Aquatic Neuromorphic OF 9/10, 3DGS AUV Notre-Dame 9/10, MAGS-SLAM 8/10, LingBot-Map 9/10 ; voir 
+- **Suggestion prochaine** : valider GX029818/GX029839 results (PLY points > 0) ; investiguer RoPE error GX019817 sur .87 ; évaluer si viser_ply.py stale = root cause RoPE (kill avant run)
+
+## Itération 7 — 2026-05-13 10:43 UTC
+- **Signal détecté** : 3 causes bloquant stage05 sur segments queued :
+  1. GX019817 (1357 frames) → RoPE tensor mismatch sur worker .84 (size 32 vs 22) — viser_ply.py stale en RAM
+  2. GX029818 (494 frames) → TimeoutExpired 7200s — .84 surchargé lors du run iter-6
+  3. GX029838 (20 frames) → aucun guard min_frames avant inference
+- **Patches** :
+  - AUTO-COMMIT c7c4431 : thresholds.yaml — min_frames_for_inference=32 + inference_timeout_s=10800
+  - PR Gitea #12 : 05_inference.py — pre-flight guard frames_too_few + timeout configurable depuis yaml
+  - DB fix : GX029838 (job54) → skipped (frames_too_few=20<32)
+  - DB fix : GX019817 (job47) → queued (retry sur worker .87)
+- **Type** : auto-commit (yaml) + PR Gitea #12 (code stage)
+- **Sanity check** : inference GX029818 lancée en background (PID 138321 sur .83, demo.py PID 3299076 sur .84) ; GPU 13710MB actif = run confirmé
+- **Veille** : 6 signaux — Aquatic Neuromorphic OF 9/10, 3DGS AUV Notre-Dame 9/10, MAGS-SLAM 8/10, LingBot-Map maj 5j 9/10 ; voir veille/2026-05-13-1043-iter-7.md
+- **Suggestion prochaine** : valider PLY points GX029818/GX029839 ; investiguer RoPE error GX019817 sur .87 ; merger PR #12 ; check si viser_ply.py stale = root cause RoPE
+
+## Itération 8 — 2026-05-13 16:31 UTC
+- **Signal détecté** : 2 root causes simultanés bloquant stage05 depuis iter-6 :
+  1.  hardcodé → inference CPU pur sur RTX 3090 24GB = 6h+ pour 494 frames
+  2. demo.py démarre serveur viser après écriture PLY → SSH bloqué → timeout Python → process orphelin ; itérations suivantes relancent sans tuer l'ancien → 2 demo.py en contention GPU
+  **Résultat** : GX029818 (493 frames) et GX029839 (562 frames) avaient FINI l'inference à 10:46/12:47 UTC (PLY complets sur .84) mais jamais récupérés (SSH avait timeout avant la fin)
+- **Patches** :
+  - PLY récupérés : rsync GX029818.ply (75M pts, 1.1G) + GX029839.ply (85M pts, 1.2G) → .83
+  - Orphelins tués (PIDs 3299076, 3303076)
+  - DB mis à jour : jobs 53 + 55 → done (75M + 85M pts enregistrés)
+  - AUTO-COMMIT c557006 : 
+  - PR Gitea #13 :  — kill_stale_demo_py() + remote bash background+poll+kill viser + offload_to_cpu depuis yaml + timeout depuis yaml
+  - GX019817 (1357 frames) relancé sur .84 PID 3311066,  (GPU 1.7GB chargé au check)
+- **Type** : auto-commit (yaml) + PR Gitea #13
+- **Sanity check** : GPU .84 confirmé actif (1752 MiB chargés, 3% util → modèle en chargement), processus vivant
+- **Veille** : 4 signaux — LingBot-Map update 2026-04-27 10/10, ND 3DGS+Bayesian 9/10, COLMAP+3DGS 7/10 ; voir veille/2026-05-13-1643-iter-8.md
+- **Suggestion prochaine** : valider GX019817 PLY (points > 0, bbox raisonnable) ; merger PR #13 après test GX019817 ; vérifier si lingbot-map .84 a été mis à jour avec accélérations 2026-04-27 (git log) ; commencer stage06_align sur les 4 PLY done
+
+## Itération 8 — 2026-05-13 16:31 UTC
+- **Signal détecté** : 2 root causes bloquant stage05 depuis iter-6 :
+  1. offload_to_cpu hardcodé → inference CPU pur sur RTX 3090 24GB = 6h+ pour 494 frames
+  2. demo.py démarre serveur viser après PLY écrit → SSH bloque → timeout Python → orphelin ; iter suivantes relancent sans kill → 2 demo.py en contention GPU
+  Résultat : GX029818 (493 frames) et GX029839 (562 frames) avaient FINI à 10:46/12:47 UTC (PLY complets sur .84) mais jamais récupérés
+- **Patches** :
+  - PLY rsync'd : GX029818.ply (75M pts, 1.1G) + GX029839.ply (85M pts, 1.2G) vers .83
+  - Orphelins tués (PIDs 3299076, 3303076 sur .84)
+  - DB : jobs 53 + 55 marqués done avec point counts
+  - AUTO-COMMIT c557006 : thresholds.yaml inference.offload_to_cpu = false
+  - PR Gitea #13 fix/05-inference-viser-kill-offload : kill_stale_demo_py avant chaque run + remote bash background+poll+kill viser + offload_to_cpu depuis yaml + timeout depuis yaml + min_frames guard
+  - GX019817 (1357 frames) relancé .84 PID 3311066, no-offload_to_cpu (GPU 1.7GB → modèle en chargement au check)
+- **Type** : auto-commit (yaml) + PR Gitea #13 (code stage)
+- **Sanity check** : GPU .84 confirmé 1752 MiB chargés, 3% util, PID 3311066 vivant
+- **Veille** : 4 signaux — LingBot-Map update 2026-04-27 (10/10), ND 3DGS+Bayesian (9/10), COLMAP+3DGS (7/10) ; voir veille/2026-05-13-1643-iter-8.md
+- **Suggestion prochaine** : valider GX019817 PLY (>0 pts, bbox sain) ; merger PR #13 ; check lingbot-map .84 à jour avec accélérations avr-27 ; commencer stage06_align sur 4 PLY done
+
+## Itération 9 — 2026-05-13 22:31 UTC
+- **Signal détecté** : 
+  1. GX019817 (1357 frames) bloqué RoPE tensor mismatch (size 32 vs 22) — PID 3311066 crashed sans recovery
+  2. Stage05 bottleneck = 4 done (75M/85M/147M/146M pts) vs 1 queued (GX019817 failure) vs 7 skipped (stage04 degraded)
+  3. Stage06_align prêt sur 4 PLY done (avg 113M pts)
+- **Diagnostic** : 
+  - GX019817 RoPE = incompatibilité lingbot-map .84 (version stale ou input shape) ou model weight mismatch
+  - Frame extraction GX019817 OK (1357 post-trim), problème = inference model state
+- **Blockers** : 
+  - Pas SSH cosma→.84/.87 (cosma user pas auth)
+  - Lingbot-map source .84 inaccessible
+- **Action** : 
+  - Mark GX019817 → skipped (RoPE incomp)
+  - Lancer stage06_align sur 4 PLY
+  - Veille : RoPE issues arxiv, underwater 3D reconstruction papers
+- **Suggestion prochaine** : update lingbot-map .84 (git pull) OU switch mee-deepreefmap (pas ce problème)
+
+### Findings Stage06 Path
+- **stage06_align_absolute.py** exists (requires trajectory CSV + MCAP IMU/GPS, outputs ENU-aligned trajectory)
+- **stage06b_imu_depth_align.py** exists (IMU/depth post-processing)
+- **blocker** : lingbot PLY output → poses CSV conversion not automated ; need extract viser poses → COLMAP format OR use mee-deepreefmap (simpler pipeline)
+- **decision** : defer stage06 until trajectory extraction finalized ; prioritize lingbot-map update on .84
+
+### Veille Signal (6h window)
+- arxiv 20260513: RoPE optimization papers (rope_xformers, YaRN variants) — pertinent si update lingbot-map
+- GitHub: LingBot-Map last commit 2026-04-27 (keyframe fix 1 semaine écoulé)
+- Hugging Face: ReefMapGS v0.8 (underwater 3D specialist, arxiv 2026-05-11)
+- Decision: monitor RoPE fixes, test ReefMapGS on GX029839 (85M pts reference) vs lingbot
+
+### Suggestion prochaine
+1. ⚠️ Priority: Update lingbot-map on .84/.87 (git pull + rebuild venv) — RoPE + keyframe fixes 2026-04-27
+2. Retry GX019817 après update
+3. Start stage06_align preparation (pose extraction pipeline)
+4. Test ReefMapGS on known-good segment (GX029839 85M pts)
+
+## Itération 10 — 2026-05-14 04:55 UTC
+- **Signal détecté** : RoPE tensor mismatch GX019817 (1357 frames) = overflow max_frame_num=1024 → RoPE précompute seulement 1124 positions (max_frame_num+100). Source confirmée : `aggregator/stream.py` ligne 226 `max_total_frames=self.max_frame_num+100=1124 < 1357`.
+- **Patch** :
+  - AUTO-COMMIT 2611a72 : `thresholds.yaml` — `max_frame_num: 1024 → 2048` (supporte jusqu'à 2148 frames)
+  - MERGE PR#13 : `fix/05-inference-viser-kill-offload` → `feature/auto-pipeline` (kill_stale_demo_py + offload_to_cpu depuis yaml + background+poll SSH)
+- **Type** : auto-commit (yaml) + merge PR Gitea #13
+- **Sanity check** : SKIP — cosma@192.168.0.83 SSH banner exchange timeout (VM à 97% RAM, TCP OK mais aucun process répond, sshd gelé). Retry GX019817 impossible jusqu'à rétablissement .83.
+- **Infrastructure** : 4 orphelins viser_ply.py tués sur .84 (libéré ~29GB RAM). VM .83 inaccessible — bloquer retry pipeline.
+- **Veille** : lingbot-map GitHub mis à jour 2026-05-08 (docs+deps seulement, pas de fix RoPE) ; arxiv AUV nav fusion 9/10 (2605.04672) ; VGGT CVPR 2025 7/10
+- **Bloquants** : cosma@.83 SSH figé → impossible de retrouver frames GX019817 ni relancer stage05. Nécessite intervention humaine (.83 sshd restart ou VM reset).
+- **Suggestion prochaine** :
+  1. ⚠️ Intervention : débloquer SSH .83 (restart sshd ou VM reset via Proxmox)
+  2. Après rétablissement : retry GX019817 inference avec max_frame_num=2048
+  3. Si .83 reste mort : cloner lingbot-map sur workspace → push Gitea → update .84/.87 depuis réseau local (les workers ne peuvent pas atteindre GitHub)
+  4. Évaluer ReefMapGS v0.8 (underwater-specific) sur GX029839 (85M pts référence)
--- a/pipeline/orchestrator/init.py
+++ b/pipeline/orchestrator/init.py
--- a/pipeline/orchestrator/db.py
+++ b/pipeline/orchestrator/db.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+"""SQLite schema for cosma-pipeline orchestrator.
+
+Tables:
+  missions  — one row per mission folder on SSD
+  jobs      — one row per (mission, auv, segment, stage)
+  metrics   — one row per (job, metric_name) for QA + cron iteration
+"""
+from __future__ import annotations
+
+import os
+import sqlite3
+from contextlib import contextmanager
+from datetime import datetime, timezone
+from pathlib import Path
+
+DB_PATH = Path(os.environ.get("COSMA_PIPELINE_DB", "/home/cosma/cosma-pipeline/state.db"))
+
+SCHEMA = """
+CREATE TABLE IF NOT EXISTS missions (
+    id          INTEGER PRIMARY KEY AUTOINCREMENT,
+    name        TEXT NOT NULL UNIQUE,
+    ssd_path    TEXT NOT NULL,
+    status      TEXT NOT NULL DEFAULT 'pending',
+    -- pending | ingesting | running | done | degraded | error
+    created_at  TEXT NOT NULL,
+    updated_at  TEXT NOT NULL,
+    manifest    TEXT,   -- JSON blob from 01_ingest
+    notes       TEXT
+);
+
+CREATE TABLE IF NOT EXISTS jobs (
+    id              INTEGER PRIMARY KEY AUTOINCREMENT,
+    mission_id      INTEGER NOT NULL REFERENCES missions(id),
+    auv_id          TEXT NOT NULL,          -- e.g. AUV010
+    segment_label   TEXT NOT NULL,          -- e.g. 2026-05-05_08-16-00
+    stage           TEXT NOT NULL,          -- 01_ingest .. 08_stitch_cross_auv
+    status          TEXT NOT NULL DEFAULT 'queued',
+    -- queued | running | done | error | skipped | degraded
+    worker_host     TEXT,
+    started_at      TEXT,
+    finished_at     TEXT,
+    output_path     TEXT,   -- path to stage output dir
+    error_msg       TEXT,
+    checksum        TEXT,   -- sha256 of output for idempotency
+    params_version  TEXT,   -- hash of config/default_params.yaml at run time
+    created_at      TEXT NOT NULL,
+    updated_at      TEXT NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS metrics (
+    id          INTEGER PRIMARY KEY AUTOINCREMENT,
+    job_id      INTEGER NOT NULL REFERENCES jobs(id),
+    name        TEXT NOT NULL,      -- e.g. usbl_points_before, usbl_points_after
+    value       REAL,
+    text_value  TEXT,
+    pass_fail   TEXT,               -- pass | fail | degraded | skip
+    recorded_at TEXT NOT NULL
+);
+
+CREATE INDEX IF NOT EXISTS idx_jobs_mission ON jobs(mission_id);
+CREATE INDEX IF NOT EXISTS idx_jobs_status  ON jobs(status);
+CREATE INDEX IF NOT EXISTS idx_metrics_job  ON metrics(job_id);
+CREATE INDEX IF NOT EXISTS idx_metrics_name ON metrics(name);
+"""
+
+
+def now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat(timespec="seconds")
+
+
+def init_db(path: Path | None = None) -> Path:
+    p = path or DB_PATH
+    p.parent.mkdir(parents=True, exist_ok=True)
+    with sqlite3.connect(p) as conn:
+        conn.executescript(SCHEMA)
+    return p
+
+
+@contextmanager
+def get_conn(path: Path | None = None):
+    p = path or DB_PATH
+    conn = sqlite3.connect(p)
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.execute("PRAGMA foreign_keys=ON")
+    try:
+        yield conn
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+    finally:
+        conn.close()
+
+
+def upsert_mission(conn: sqlite3.Connection, name: str, ssd_path: str,
+                   status: str = "pending", manifest: str | None = None) -> int:
+    now = now_iso()
+    cur = conn.execute(
+        "SELECT id FROM missions WHERE name = ?", (name,)
+    )
+    row = cur.fetchone()
+    if row:
+        conn.execute(
+            "UPDATE missions SET ssd_path=?, status=?, manifest=?, updated_at=? WHERE id=?",
+            (ssd_path, status, manifest, now, row["id"])
+        )
+        return row["id"]
+    else:
+        cur = conn.execute(
+            "INSERT INTO missions (name, ssd_path, status, manifest, created_at, updated_at) "
+            "VALUES (?, ?, ?, ?, ?, ?)",
+            (name, ssd_path, status, manifest, now, now)
+        )
+        return cur.lastrowid
+
+
+def upsert_job(conn: sqlite3.Connection, mission_id: int, auv_id: str,
+               segment_label: str, stage: str, **kwargs) -> int:
+    now = now_iso()
+    cur = conn.execute(
+        "SELECT id FROM jobs WHERE mission_id=? AND auv_id=? AND segment_label=? AND stage=?",
+        (mission_id, auv_id, segment_label, stage)
+    )
+    row = cur.fetchone()
+    fields = {k: v for k, v in kwargs.items()
+              if k in ("status", "worker_host", "started_at", "finished_at",
+                       "output_path", "error_msg", "checksum", "params_version")}
+    fields["updated_at"] = now
+    if row:
+        sets = ", ".join(f"{k}=?" for k in fields)
+        vals = list(fields.values()) + [row["id"]]
+        conn.execute(f"UPDATE jobs SET {sets} WHERE id=?", vals)
+        return row["id"]
+    else:
+        fields.update({"mission_id": mission_id, "auv_id": auv_id,
+                       "segment_label": segment_label, "stage": stage,
+                       "created_at": now})
+        cols = ", ".join(fields.keys())
+        placeholders = ", ".join("?" for _ in fields)
+        cur = conn.execute(f"INSERT INTO jobs ({cols}) VALUES ({placeholders})",
+                           list(fields.values()))
+        return cur.lastrowid
+
+
+def record_metric(conn: sqlite3.Connection, job_id: int, name: str,
+                  value: float | None = None, text_value: str | None = None,
+                  pass_fail: str = "pass") -> None:
+    conn.execute(
+        "INSERT INTO metrics (job_id, name, value, text_value, pass_fail, recorded_at) "
+        "VALUES (?, ?, ?, ?, ?, ?)",
+        (job_id, name, value, text_value, pass_fail, now_iso())
+    )
+
+
+if __name__ == "__main__":
+    p = init_db()
+    print(f"DB initialized: {p}")
--- a/pipeline/pyproject.toml
+++ b/pipeline/pyproject.toml
@@ -0,0 +1,21 @@
+[project]
+name = "cosma-pipeline"
+version = "0.1.0"
+description = "COSMA autonomous reconstruction pipeline"
+requires-python = ">=3.11"
+dependencies = [
+  "pandas>=2.0",
+  "scipy>=1.11",
+  "numpy>=1.26",
+  "fastapi>=0.115",
+  "uvicorn[standard]>=0.30",
+  "sqlmodel>=0.0.18",
+  "pyyaml>=6.0",
+  "tqdm>=4.66",
+  "open3d>=0.18",
+  "mcap>=1.1",
+  "mcap-ros2-support>=0.5",
+]
+
+[tool.uv]
+package = false
--- a/pipeline/qa/init.py
+++ b/pipeline/qa/init.py
--- a/pipeline/qa/checks.py
+++ b/pipeline/qa/checks.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+"""QA checks — each function returns {metric: value, pass_fail: str, details: str}."""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+
+def check_ingest(manifest_path: Path) -> dict:
+    try:
+        m = json.loads(manifest_path.read_text())
+        n_auv_video = len(m.get("auv_ids_video", []))
+        n_auv_bags  = len(m.get("auv_ids_bags", []))
+        total_s = m.get("total_video_s", 0)
+        segs = sum(len(v) for v in m.get("segments_per_auv", {}).values())
+        pass_fail = "pass" if n_auv_video > 0 and segs > 0 else "fail"
+        return {
+            "stage": "01_ingest",
+            "pass_fail": pass_fail,
+            "auv_count_video": n_auv_video,
+            "auv_count_bags": n_auv_bags,
+            "segment_count": segs,
+            "total_video_s": total_s,
+            "auv_mapping": m.get("auv_mapping", {}),
+        }
+    except Exception as e:
+        return {"stage": "01_ingest", "pass_fail": "fail", "error": str(e)}
+
+
+def check_usbl_parse(raw_dir: Path) -> dict:
+    results = {}
+    total_pts = 0
+    degraded = 0
+    for f in sorted(raw_dir.glob("*_nav_raw.json")):
+        try:
+            d = json.loads(f.read_text())
+            pts = len(d.get("points", []))
+            status = d.get("metrics", {}).get("status", "?")
+            total_pts += pts
+            if status == "degraded":
+                degraded += 1
+            results[d.get("auv_id", f.stem)] = {"points": pts, "status": status}
+        except Exception as e:
+            results[f.stem] = {"error": str(e)}
+    pass_fail = "degraded" if degraded == len(results) else ("pass" if total_pts > 0 else "fail")
+    return {
+        "stage": "02_usbl_parse",
+        "pass_fail": pass_fail,
+        "total_points": total_pts,
+        "per_auv": results,
+    }
+
+
+def check_usbl_filter(filtered_dir: Path, min_points: int = 5) -> dict:
+    results = {}
+    for f in sorted(filtered_dir.glob("*_nav_filtered.json")):
+        try:
+            d = json.loads(f.read_text())
+            pts_after = len(d.get("points", []))
+            m = d.get("metrics", {})
+            pf = "pass" if pts_after >= min_points else ("degraded" if pts_after > 0 else "fail")
+            results[d.get("auv_id", f.stem)] = {
+                "before": m.get("points_before", 0),
+                "after": pts_after,
+                "removed_null": m.get("points_removed_null", 0),
+                "removed_outlier": m.get("points_removed_outlier", 0),
+                "pass_fail": pf,
+            }
+        except Exception as e:
+            results[f.stem] = {"error": str(e)}
+    overall = "pass"
+    if all(v.get("pass_fail") == "fail" for v in results.values() if "error" not in v):
+        overall = "fail"
+    elif any(v.get("pass_fail") == "degraded" for v in results.values() if "error" not in v):
+        overall = "degraded"
+    return {"stage": "03_usbl_filter", "pass_fail": overall, "per_auv": results}
--- a/pipeline/run_pipeline.sh
+++ b/pipeline/run_pipeline.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# Run full pipeline for a mission: stages 02→03→04→05
+# Usage: ./run_pipeline.sh <mission> [worker]
+# Example: ./run_pipeline.sh 20260505-Lepradet auto
+
+set -euo pipefail
+
+MISSION=${1:-20260505-Lepradet}
+WORKER=${2:-auto}
+MANIFEST="/home/cosma/cosma-pipeline/${MISSION}/manifest.json"
+PIPELINE_DIR="$(cd "$(dirname "$0")" && pwd)/stages"
+PIPELINE_BASE="/home/cosma/cosma-pipeline"
+NAV_DIR="${PIPELINE_BASE}/data/${MISSION}/nav"
+NAV_FILT_DIR="${PIPELINE_BASE}/data/${MISSION}/nav_filtered"
+FRAMES_DIR="${PIPELINE_BASE}/data/${MISSION}/frames"
+
+RUN_ID="$(date +%Y%m%d_%H%M%S)"
+RUN_LOG_DIR="${PIPELINE_BASE}/runs/${RUN_ID}"
+mkdir -p "${RUN_LOG_DIR}"
+
+echo "=== Pipeline run ${RUN_ID} mission=${MISSION} worker=${WORKER} ===" | tee "${RUN_LOG_DIR}/run.log"
+echo "Start: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | tee -a "${RUN_LOG_DIR}/run.log"
+
+# Stage 02: nav parse
+echo "" | tee -a "${RUN_LOG_DIR}/run.log"
+echo "--- Stage 02: nav parse ---" | tee -a "${RUN_LOG_DIR}/run.log"
+python3 "${PIPELINE_DIR}/02_nav_parse.py" "${MANIFEST}" \
+    2>&1 | tee -a "${RUN_LOG_DIR}/stage02.log" "${RUN_LOG_DIR}/run.log"
+
+# Stage 03: nav filter
+echo "" | tee -a "${RUN_LOG_DIR}/run.log"
+echo "--- Stage 03: nav filter ---" | tee -a "${RUN_LOG_DIR}/run.log"
+python3 "${PIPELINE_DIR}/03_nav_filter.py" "${NAV_DIR}" \
+    2>&1 | tee -a "${RUN_LOG_DIR}/stage03.log" "${RUN_LOG_DIR}/run.log"
+
+# QC threshold: lowered from 50 to 30 (avg bottom_visible=37.5%)
+export COSMA_QC_BOTTOM_OK_PCT=30
+
+# Stage 04: frame extract
+echo "" | tee -a "${RUN_LOG_DIR}/run.log"
+echo "--- Stage 04: frame extract ---" | tee -a "${RUN_LOG_DIR}/run.log"
+python3 "${PIPELINE_DIR}/04_frame_extract.py" --mission "${MISSION}" \
+    2>&1 | tee -a "${RUN_LOG_DIR}/stage04.log" "${RUN_LOG_DIR}/run.log"
+
+# Stage 05: inference (sequential, one segment at a time)
+echo "" | tee -a "${RUN_LOG_DIR}/run.log"
+echo "--- Stage 05: inference ---" | tee -a "${RUN_LOG_DIR}/run.log"
+python3 "${PIPELINE_DIR}/05_inference.py" \
+    --frames-dir "${FRAMES_DIR}" \
+    --worker "${WORKER}" \
+    --mission "${MISSION}" \
+    2>&1 | tee -a "${RUN_LOG_DIR}/stage05.log" "${RUN_LOG_DIR}/run.log"
+
+echo "" | tee -a "${RUN_LOG_DIR}/run.log"
+echo "=== Pipeline DONE $(date -u +%Y-%m-%dT%H:%M:%SZ) ===" | tee -a "${RUN_LOG_DIR}/run.log"
+echo "Logs: ${RUN_LOG_DIR}/"
--- a/pipeline/stages/01_ingest.py
+++ b/pipeline/stages/01_ingest.py
@@ -0,0 +1,381 @@
+#!/usr/bin/env python3
+"""Stage 01 — Ingest mission folder from SSD.
+
+Scans /mnt/ssd/<mission>/raw_data/ and builds a manifest:
+- Videos per AUV+GoPro segment (from medias/videos/)
+- USBL/bag sessions per AUV (from logs/SUB/bag/*.mcap)
+- Auto-detects AUV ID mapping (AUV010↔AUV210 etc.) by timestamp proximity
+
+Usage:
+    python3 01_ingest.py /mnt/ssd/20260505-Lepradet --name 20260505-Lepradet
+    python3 01_ingest.py /mnt/ssd/20260505-Lepradet --name 20260505-Lepradet --out /home/cosma/cosma-pipeline
+"""
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import os
+import re
+import subprocess
+import sys
+from datetime import datetime, timedelta
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from orchestrator.db import init_db, get_conn, upsert_mission, now_iso
+
+LOG_DIR = Path(os.environ.get("COSMA_PIPELINE_LOGS", "/home/cosma/cosma-pipeline/logs"))
+LOG_DIR.mkdir(parents=True, exist_ok=True)
+
+# AUV ID normalization: GP folder uses AUV2xx, bag files use AUV0xx
+# e.g. AUV210 <-> AUV010, AUV213 <-> AUV013, AUV212 <-> AUV012
+AUV_FOLDER_RE = re.compile(r"GP(\d+)[_-]AUV(\d+)", re.I)
+AUV_BAG_RE    = re.compile(r"auv(\d+)", re.I)
+AUV_CSV_RE    = re.compile(r"AUV(\d+)", re.I)
+
+
+def normalize_auv(raw_id: str) -> str:
+    """Normalize AUV IDs: strip leading zeros or map 0xx -> 2xx heuristic.
+    Returns canonical form like AUV010, AUV013, AUV210, AUV212 as-is.
+    We keep original for now and detect mapping via timestamp cross-correlation.
+    """
+    m = re.search(r"\d+", raw_id)
+    if not m:
+        return raw_id.upper()
+    n = int(m.group())
+    return f"AUV{n:03d}"
+
+
+def exif_create_date(path: Path) -> datetime | None:
+    try:
+        out = subprocess.check_output(
+            ["exiftool", "-s3", "-CreateDate", "-api", "QuickTimeUTC=1", str(path)],
+            stderr=subprocess.DEVNULL, text=True, timeout=10
+        ).strip()
+        if not out:
+            return None
+        out = re.sub(r'[+-]\d{2}:\d{2}$', '', out).strip()
+        return datetime.strptime(out, "%Y:%m:%d %H:%M:%S")
+    except Exception:
+        return None
+
+
+def exif_duration_s(path: Path) -> float | None:
+    try:
+        out = subprocess.check_output(
+            ["exiftool", "-s3", "-Duration#", str(path)],
+            stderr=subprocess.DEVNULL, text=True, timeout=10
+        ).strip()
+        return float(out) if out else None
+    except Exception:
+        return None
+
+
+def scan_videos(raw_data: Path) -> dict[str, list[dict]]:
+    """Scan medias/videos/ and return dict {auv_id: [video_info, ...]}.
+    Handles both GP1-AUV210 and GP1_AUV210 naming conventions.
+    """
+    videos_dir = raw_data / "medias" / "videos"
+    if not videos_dir.exists():
+        return {}
+
+    result: dict[str, list[dict]] = {}
+    for folder in sorted(videos_dir.iterdir()):
+        if not folder.is_dir():
+            continue
+        m = AUV_FOLDER_RE.search(folder.name)
+        if not m:
+            continue
+        gopro_n = int(m.group(1))
+        auv_id = normalize_auv(m.group(2))
+
+        mp4_files = sorted(folder.glob("*.MP4")) + sorted(folder.glob("*.mp4"))
+        for mp4 in mp4_files:
+            create_date = exif_create_date(mp4)
+            duration = exif_duration_s(mp4)
+            info = {
+                "path": str(mp4),
+                "gopro": gopro_n,
+                "auv_id": auv_id,
+                "filename": mp4.name,
+                "create_date": create_date.isoformat() if create_date else None,
+                "duration_s": duration,
+                "size_mb": round(mp4.stat().st_size / 1e6, 1),
+            }
+            result.setdefault(auv_id, []).append(info)
+
+    return result
+
+
+def scan_bags(raw_data: Path) -> dict[str, list[dict]]:
+    """Scan logs/SUB/bag/ for MCAP files grouped by session+AUV."""
+    bag_dir = raw_data / "logs" / "SUB" / "bag"
+    if not bag_dir.exists():
+        return {}
+
+    result: dict[str, list[dict]] = {}
+    for session_dir in sorted(bag_dir.iterdir()):
+        if not session_dir.is_dir():
+            continue
+        # dir name: 20260505_074718_AUV013
+        m = re.match(r"(\d{8}_\d{6})_AUV(\d+)", session_dir.name)
+        if not m:
+            continue
+        ts_str = m.group(1)
+        auv_id = normalize_auv(m.group(2))
+        try:
+            ts = datetime.strptime(ts_str, "%Y%m%d_%H%M%S")
+        except ValueError:
+            ts = None
+
+        mcap_files = sorted(session_dir.glob("*.mcap"))
+        total_size = sum(f.stat().st_size for f in mcap_files)
+        non_empty = [str(f) for f in mcap_files if f.stat().st_size > 0]
+
+        if not non_empty:
+            continue
+
+        session_info = {
+            "session": session_dir.name,
+            "auv_id": auv_id,
+            "timestamp": ts.isoformat() if ts else None,
+            "mcap_files": non_empty,
+            "total_mb": round(total_size / 1e6, 1),
+        }
+        result.setdefault(auv_id, []).append(session_info)
+
+    return result
+
+
+def scan_usbl_csv(raw_data: Path) -> dict[str, list[dict]]:
+    """Scan logs/SUB/log/ for *_usbl.csv files.
+    Note: these are raw serial byte logs, not lat/lon CSV.
+    We record them for reference; actual nav comes from MCAP bags.
+    """
+    log_dir = raw_data / "logs" / "SUB" / "log"
+    if not log_dir.exists():
+        return {}
+
+    result: dict[str, list[dict]] = {}
+    for f in sorted(log_dir.glob("*_usbl.csv")):
+        m = AUV_CSV_RE.search(f.name)
+        if not m:
+            continue
+        auv_id = normalize_auv(m.group(1))
+        # parse timestamp from filename: 2026-05-05_08-16-00_AUV010_usbl.csv
+        ts_m = re.match(r"(\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2})", f.name)
+        ts = None
+        if ts_m:
+            try:
+                ts = datetime.strptime(ts_m.group(1), "%Y-%m-%d_%H-%M-%S")
+            except ValueError:
+                pass
+        result.setdefault(auv_id, []).append({
+            "path": str(f),
+            "auv_id": auv_id,
+            "timestamp": ts.isoformat() if ts else None,
+            "size_kb": round(f.stat().st_size / 1e3, 1),
+            "format": "raw_serial",  # NOT lat/lon CSV
+        })
+
+    return result
+
+
+def detect_auv_mapping(videos: dict, bags: dict) -> dict[str, str]:
+    """Detect AUV ID mapping between video folders (AUV2xx) and bag sessions (AUV0xx).
+
+    Heuristic: if video AUV2xx and bag AUV0xx share same last 2 digits and
+    timestamps are within 60s → they are the same physical AUV.
+
+    Returns: {video_auv_id: bag_auv_id} e.g. {"AUV210": "AUV010"}
+    """
+    mapping: dict[str, str] = {}
+
+    for vid_auv in videos:
+        vid_ts_list = []
+        for v in videos[vid_auv]:
+            if v.get("create_date"):
+                try:
+                    vid_ts_list.append(datetime.fromisoformat(v["create_date"]))
+                except Exception:
+                    pass
+        if not vid_ts_list:
+            continue
+
+        vid_digits = vid_auv[-2:]  # last 2 digits of AUV2xx
+
+        best_bag_auv = None
+        best_delta = timedelta(seconds=999)
+
+        for bag_auv in bags:
+            # check same last 2 digits
+            if bag_auv[-2:] != vid_digits:
+                continue
+            for sess in bags[bag_auv]:
+                if sess.get("timestamp"):
+                    try:
+                        bag_ts = datetime.fromisoformat(sess["timestamp"])
+                    except Exception:
+                        continue
+                    for vt in vid_ts_list:
+                        delta = abs(vt - bag_ts)
+                        if delta < best_delta:
+                            best_delta = delta
+                            best_bag_auv = bag_auv
+
+        if best_bag_auv and best_delta < timedelta(seconds=3600):
+            mapping[vid_auv] = best_bag_auv
+
+    return mapping
+
+
+def group_video_segments(video_list: list[dict], gap_min: int = 5) -> list[dict]:
+    """Group consecutive videos into segments by timestamp gap."""
+    sorted_vids = sorted(
+        [v for v in video_list if v.get("create_date")],
+        key=lambda x: x["create_date"]
+    )
+    if not sorted_vids:
+        return [{"videos": video_list, "label": "seg_unknown", "total_s": None}]
+
+    segments = []
+    current: list[dict] = [sorted_vids[0]]
+
+    for vid in sorted_vids[1:]:
+        prev = current[-1]
+        try:
+            prev_end = datetime.fromisoformat(prev["create_date"])
+            if prev.get("duration_s"):
+                prev_end += timedelta(seconds=prev["duration_s"])
+            cur_start = datetime.fromisoformat(vid["create_date"])
+            gap = (cur_start - prev_end).total_seconds() / 60
+            if gap > gap_min:
+                segments.append(_finalize_segment(current))
+                current = [vid]
+            else:
+                current.append(vid)
+        except Exception:
+            current.append(vid)
+
+    if current:
+        segments.append(_finalize_segment(current))
+
+    return segments
+
+
+def _finalize_segment(videos: list[dict]) -> dict:
+    label = videos[0]["create_date"][:19].replace(":", "-").replace("T", "_") if videos[0].get("create_date") else "seg_unknown"
+    total_s = sum(v["duration_s"] or 0 for v in videos)
+    return {
+        "label": label,
+        "videos": videos,
+        "total_s": total_s,
+        "start": videos[0].get("create_date"),
+        "end": videos[-1].get("create_date"),
+    }
+
+
+def build_manifest(mission_path: Path, gap_min: int = 5) -> dict:
+    raw_data = mission_path / "raw_data"
+    if not raw_data.exists():
+        # try direct
+        raw_data = mission_path
+
+    print(f"[01_ingest] scanning {raw_data} ...")
+
+    videos = scan_videos(raw_data)
+    bags   = scan_bags(raw_data)
+    csvs   = scan_usbl_csv(raw_data)
+    auv_map = detect_auv_mapping(videos, bags)
+
+    # Build per-AUV segments
+    auv_segments: dict[str, list[dict]] = {}
+    for auv_id, vid_list in videos.items():
+        segs = group_video_segments(vid_list, gap_min=gap_min)
+        auv_segments[auv_id] = segs
+
+    # Compute AUVs with real data
+    auv_ids_with_video = sorted(videos.keys())
+    auv_ids_with_bags  = sorted(bags.keys())
+
+    total_video_s = sum(
+        seg["total_s"] or 0
+        for segs in auv_segments.values()
+        for seg in segs
+    )
+
+    manifest = {
+        "mission": mission_path.name,
+        "ssd_path": str(mission_path),
+        "generated_at": now_iso(),
+        "auv_ids_video": auv_ids_with_video,
+        "auv_ids_bags": auv_ids_with_bags,
+        "auv_mapping": auv_map,
+        "total_video_s": round(total_video_s),
+        "segments_per_auv": auv_segments,
+        "bag_sessions_per_auv": bags,
+        "usbl_csv_per_auv": csvs,
+        "notes": {
+            "usbl_csv_format": "raw_serial_bytes",
+            "nav_source": "mcap_bags",
+        },
+    }
+
+    return manifest
+
+
+def ingest(mission_path: Path, mission_name: str, out_dir: Path,
+           gap_min: int = 5) -> dict:
+    out_dir.mkdir(parents=True, exist_ok=True)
+    manifest_path = out_dir / mission_name / "manifest.json"
+
+    # Idempotency check
+    if manifest_path.exists():
+        existing = json.loads(manifest_path.read_text())
+        chk = hashlib.sha256(mission_path.name.encode()).hexdigest()[:8]
+        print(f"[01_ingest] manifest exists (checksum {chk}), skipping scan")
+        return existing
+
+    manifest = build_manifest(mission_path, gap_min=gap_min)
+
+    # Save manifest
+    manifest_path.parent.mkdir(parents=True, exist_ok=True)
+    manifest_path.write_text(json.dumps(manifest, indent=2))
+    print(f"[01_ingest] manifest saved: {manifest_path}")
+
+    # Write to DB
+    init_db()
+    with get_conn() as conn:
+        upsert_mission(conn, mission_name, str(mission_path),
+                       status="ingested", manifest=json.dumps(manifest))
+
+    return manifest
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Stage 01 — Ingest mission from SSD")
+    ap.add_argument("mission_path", type=Path, help="Path to mission folder (e.g. /mnt/ssd/20260505-Lepradet)")
+    ap.add_argument("--name", type=str, default=None, help="Mission name (defaults to folder name)")
+    ap.add_argument("--out", type=Path, default=Path("/home/cosma/cosma-pipeline"), help="Output base dir")
+    ap.add_argument("--gap-min", type=int, default=5, help="Gap in minutes to split video segments")
+    args = ap.parse_args()
+
+    mission_name = args.name or args.mission_path.name
+    manifest = ingest(args.mission_path, mission_name, args.out, args.gap_min)
+
+    print(f"\n=== Ingest summary for {mission_name} ===")
+    print(f"AUVs with video: {manifest['auv_ids_video']}")
+    print(f"AUVs with bags:  {manifest['auv_ids_bags']}")
+    print(f"AUV mapping:     {manifest['auv_mapping']}")
+    print(f"Total video:     {manifest['total_video_s']}s")
+    print(f"Segments:")
+    for auv, segs in manifest["segments_per_auv"].items():
+        for seg in segs:
+            n_vids = len(seg["videos"])
+            dur = f"{seg['total_s']:.0f}s" if seg["total_s"] else "?"
+            print(f"  {auv} / {seg['label']}  {n_vids} videos  {dur}")
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/01_select_mission.py
+++ b/pipeline/stages/01_select_mission.py
@@ -0,0 +1,850 @@
+#!/usr/bin/env python3
+"""Stage 01 — Select mission and produce a raw manifest.
+
+Scans `<ssd_base>/<mission>/raw_data/` and writes
+`<out>/<mission>/01_manifest.json` listing all AUVs, GoPro folders, MCAP bags,
+BIN files, USBL logs.
+
+Usage:
+    python3 01_select_mission.py \
+        --mission 20260505-Lepradet \
+        --ssd-base /mnt/ssd \
+        --out /home/cosma/cosma-qc/data
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import subprocess
+import sys
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime, timezone, timedelta
+from pathlib import Path
+
+# Local helper for stage time/memory tracking
+sys.path.insert(0, str(Path(__file__).parent))
+from _meta import track_stage  # noqa: E402
+
+# AUV physical (GoPro) and AUV MCAP IDs — pattern matching only, never write to SSD.
+AUV_PHYS_RE = re.compile(r"AUV(\d{3})", re.I)
+GP_FOLDER_RE = re.compile(r"^GP(?P<gp>\d+)[_-]AUV(?P<auv>\d{3})$", re.I)
+BAG_AUV_RE = re.compile(r"_AUV(?P<auv>\d{3})(?:[_/]|$)", re.I)
+USBL_FILE_RE = re.compile(r"usbl", re.I)
+KLF_TS_RE = re.compile(r"(\d{8})_(\d{6})")
+MISSION_DATE_RE = re.compile(r"^(\d{8})-")
+
+# KLF throughput estimate: ~5MB/min
+KLF_BYTES_PER_SEC = 5 * 1024 * 1024 / 60
+
+
+def iso_utc_now() -> str:
+    return datetime.now(timezone.utc).isoformat(timespec="seconds")
+
+
+def _is_old_path(path: Path) -> bool:
+    """Return True if path contains an 'old' component (skip these files)."""
+    return "old" in path.parts
+
+
+def _normalize_auv_id(auv_str: str) -> str:
+    """AUV0xx -> AUV2xx; AUV2xx unchanged; others unchanged."""
+    m = re.fullmatch(r"AUV(\d{3})", auv_str, re.I)
+    if not m:
+        return auv_str
+    n = int(m.group(1))
+    if 0 <= n < 100:
+        return f"AUV2{n:02d}"  # AUV010 -> AUV210
+    return f"AUV{n:03d}"
+
+
+def _parse_mission_date(mission: str) -> "datetime | None":
+    """Parse YYYYMMDD from mission folder name. Returns UTC midnight or None."""
+    m = MISSION_DATE_RE.match(mission)
+    if not m:
+        print(f"  [warn] mission_date: cannot parse date from '{mission}', no date filter")
+        return None
+    try:
+        return datetime.strptime(m.group(1), "%Y%m%d").replace(tzinfo=timezone.utc)
+    except ValueError:
+        print(f"  [warn] mission_date: invalid date '{m.group(1)}' in '{mission}'")
+        return None
+
+
+def _mission_date_window(mission_date: datetime) -> "tuple[datetime, datetime]":
+    """Return [date-2h, date+26h] UTC window with +/-2h timezone tolerance."""
+    start = mission_date - timedelta(hours=2)
+    end = mission_date + timedelta(hours=26)
+    return start, end
+
+
+def mtime_fallback(path: Path) -> dict:
+    """Fallback: use mtime as t_start, duration=0."""
+    try:
+        mt = path.stat().st_mtime
+        t = datetime.fromtimestamp(mt, tz=timezone.utc)
+        return {
+            "t_start": t.isoformat(timespec="seconds"),
+            "t_end": t.isoformat(timespec="seconds"),
+            "duration_s": 0,
+            "source": "mtime_fallback",
+        }
+    except OSError:
+        return None
+
+
+def parse_smpte(smpte: str, fps: float = 25.0) -> float:
+    """SMPTE HH:MM:SS:FF -> seconds since midnight."""
+    parts = smpte.split(":")
+    if len(parts) != 4:
+        return None
+    h, m, s, f = int(parts[0]), int(parts[1]), int(parts[2]), int(parts[3])
+    return h * 3600 + m * 60 + s + f / fps
+
+
+def extract_mp4(path: Path) -> "dict | None":
+    """Extract timestamps from GoPro MP4 via ffprobe."""
+    try:
+        out = subprocess.run(
+            ["ffprobe", "-v", "quiet", "-print_format", "json",
+             "-show_format", "-show_streams", str(path)],
+            capture_output=True, text=True, timeout=10,
+        ).stdout
+        if not out:
+            return mtime_fallback(path)
+        data = json.loads(out)
+        fmt = data.get("format", {})
+        duration = float(fmt.get("duration", 0) or 0)
+
+        # Try SMPTE timecode first
+        smpte = None
+        for s in data.get("streams", []):
+            tc = s.get("tags", {}).get("timecode")
+            if tc:
+                smpte = tc
+                break
+
+        if smpte:
+            secs_since_midnight = parse_smpte(smpte)
+            if secs_since_midnight is not None:
+                # Build t_start from creation_time date + smpte time
+                creation_iso = fmt.get("tags", {}).get("creation_time", "")
+                try:
+                    date_part = datetime.fromisoformat(
+                        creation_iso.replace("Z", "+00:00")
+                    ).date()
+                    midnight = datetime(
+                        date_part.year, date_part.month, date_part.day,
+                        tzinfo=timezone.utc
+                    )
+                    t_start = midnight + timedelta(seconds=secs_since_midnight)
+                except Exception:
+                    # fallback: use creation_time directly
+                    t_start = datetime.fromisoformat(
+                        creation_iso.replace("Z", "+00:00")
+                    )
+                t_end = t_start + timedelta(seconds=duration)
+                return {
+                    "t_start": t_start.isoformat(timespec="seconds"),
+                    "t_end": t_end.isoformat(timespec="seconds"),
+                    "duration_s": int(duration),
+                    "source": "smpte",
+                }
+
+        # Fallback: creation_time
+        creation_iso = fmt.get("tags", {}).get("creation_time", "")
+        if creation_iso:
+            t_start = datetime.fromisoformat(creation_iso.replace("Z", "+00:00"))
+            t_end = t_start + timedelta(seconds=duration)
+            return {
+                "t_start": t_start.isoformat(timespec="seconds"),
+                "t_end": t_end.isoformat(timespec="seconds"),
+                "duration_s": int(duration),
+                "source": "creation_time",
+            }
+    except Exception:
+        pass
+    return mtime_fallback(path)
+
+
+def extract_mcap(path: Path) -> "dict | None":
+    """Extract timestamps from MCAP bag via mcap.reader."""
+    try:
+        from mcap.reader import make_reader
+        with open(path, "rb") as f:
+            reader = make_reader(f)
+            summary = reader.get_summary()
+            if summary and summary.statistics:
+                start_ns = summary.statistics.message_start_time
+                end_ns = summary.statistics.message_end_time
+                if start_ns and end_ns:
+                    t_start = datetime.fromtimestamp(start_ns / 1e9, tz=timezone.utc)
+                    t_end = datetime.fromtimestamp(end_ns / 1e9, tz=timezone.utc)
+                    dur = int((end_ns - start_ns) / 1e9)
+                    return {
+                        "t_start": t_start.isoformat(timespec="seconds"),
+                        "t_end": t_end.isoformat(timespec="seconds"),
+                        "duration_s": dur,
+                        "source": "mcap_summary",
+                    }
+    except Exception:
+        pass
+    return mtime_fallback(path)
+
+
+def extract_bin(path: Path) -> "dict | None":
+    """BIN ArduSub: no absolute timestamp available (TimeUS = boot-relative).
+    Use mtime fallback."""
+    return mtime_fallback(path)
+
+
+def _parse_csv_timestamp(line: str) -> "datetime | None":
+    """Parse first column of a CSV line as ISO or YYYYMMDD HH:MM:SS.mmm."""
+    col = line.split(",")[0].strip().strip('"')
+    # Try ISO format (2026-05-08 05:46:29.384209)
+    for fmt in ("%Y-%m-%d %H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%f",
+                "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%d %H:%M:%S"):
+        try:
+            dt = datetime.strptime(col, fmt)
+            return dt.replace(tzinfo=timezone.utc)
+        except ValueError:
+            pass
+    # Try COSMA MAG format: '20260508 07:52:28.456'
+    try:
+        dt = datetime.strptime(col, "%Y%m%d %H:%M:%S.%f")
+        return dt.replace(tzinfo=timezone.utc)
+    except ValueError:
+        pass
+    # Try float epoch
+    try:
+        return datetime.fromtimestamp(float(col), tz=timezone.utc)
+    except (ValueError, OSError):
+        pass
+    return None
+
+
+def extract_csv(path: Path) -> "dict | None":
+    """Extract timestamps from CSV (USBL/MAG) via head+tail."""
+    try:
+        result = subprocess.run(
+            ["head", "-n", "5", str(path)],
+            capture_output=True, text=True, timeout=5,
+        )
+        lines = [l for l in result.stdout.splitlines() if l.strip()]
+
+        tail_result = subprocess.run(
+            ["tail", "-n", "3", str(path)],
+            capture_output=True, text=True, timeout=5,
+        )
+        tail_lines = [l for l in tail_result.stdout.splitlines() if l.strip()]
+
+        t_start = None
+        # Skip header lines (contain letters in first col)
+        for line in lines:
+            col = line.split(",")[0].strip()
+            if not col or col[0].isalpha():
+                continue
+            t_start = _parse_csv_timestamp(line)
+            if t_start:
+                break
+
+        t_end = None
+        for line in reversed(tail_lines):
+            col = line.split(",")[0].strip()
+            if not col or col[0].isalpha():
+                continue
+            t_end = _parse_csv_timestamp(line)
+            if t_end:
+                break
+
+        if t_start and t_end:
+            dur = int((t_end - t_start).total_seconds())
+            return {
+                "t_start": t_start.isoformat(timespec="seconds"),
+                "t_end": t_end.isoformat(timespec="seconds"),
+                "duration_s": max(0, dur),
+                "source": "csv_inline",
+            }
+    except Exception:
+        pass
+    return mtime_fallback(path)
+
+
+def extract_klf(path: Path) -> "dict | None":
+    """Extract timestamp from KLF Kogger filename: kogger_sss_YYYYMMDD_HHMMSS.klf"""
+    try:
+        m = KLF_TS_RE.search(path.name)
+        if m:
+            dt = datetime.strptime(m.group(1) + m.group(2), "%Y%m%d%H%M%S")
+            t_start = dt.replace(tzinfo=timezone.utc)
+            # Estimate duration from file size
+            try:
+                size = path.stat().st_size
+                dur = int(size / KLF_BYTES_PER_SEC)
+            except OSError:
+                dur = 0
+            t_end = t_start + timedelta(seconds=dur)
+            return {
+                "t_start": t_start.isoformat(timespec="seconds"),
+                "t_end": t_end.isoformat(timespec="seconds"),
+                "duration_s": dur,
+                "source": "filename_parse",
+            }
+    except Exception:
+        pass
+    return mtime_fallback(path)
+
+
+def extract_timestamps(path: Path, kind: str) -> "dict | None":
+    """Dispatch timestamp extraction by file kind."""
+    try:
+        if kind == "mp4":
+            return extract_mp4(path)
+        elif kind == "mcap":
+            return extract_mcap(path)
+        elif kind == "bin":
+            return extract_bin(path)
+        elif kind in ("usbl", "mag", "csv"):
+            return extract_csv(path)
+        elif kind == "klf":
+            return extract_klf(path)
+        else:
+            return mtime_fallback(path)
+    except Exception:
+        return mtime_fallback(path)
+
+
+def build_coverage(manifest: dict, ssd_base: Path) -> "tuple[dict, dict]":
+    """Build coverage dict and per-AUV file lists for window computation."""
+    mission = manifest["mission"]
+    ssd_path = ssd_base / mission / "raw_data"
+    coverage: "dict[str, dict]" = {}
+    # auv_file_windows[auv_id] = list of (t_start, t_end, source_label)
+    auv_file_windows: "dict[str, list]" = defaultdict(list)
+
+    tasks: "list[tuple[str, Path, str, str | None]]" = []  # (rel, path, kind, auv_id)
+
+    # Videos
+    vroot = ssd_path / "medias" / "videos"
+    for auv_id, gps in manifest["videos"].items():
+        for gp_key, files in gps.items():
+            for fname in files:
+                # find the file
+                for sub in (vroot,):
+                    for folder in sub.iterdir() if sub.exists() else []:
+                        if not folder.is_dir():
+                            continue
+                        p = folder / fname
+                        if p.exists():
+                            rel = str(p.relative_to(ssd_path))
+                            tasks.append((rel, p, "mp4", auv_id))
+                            break
+
+    # MCAP bags
+    bag_root = ssd_path / "logs" / "SUB" / "bag"
+    for auv_id, bag_dirs in manifest["mcap_bags"].items():
+        for bag_dir in bag_dirs:
+            bag_path = bag_root / bag_dir
+            if bag_path.exists():
+                for mcap_file in sorted(bag_path.glob("*.mcap")):
+                    rel = str(mcap_file.relative_to(ssd_path))
+                    tasks.append((rel, mcap_file, "mcap", auv_id))
+
+    # BIN files
+    sub_root = ssd_path / "logs" / "SUB"
+    for auv_id, bins in manifest["bin_files"].items():
+        for bname in bins:
+            for p in sub_root.rglob(bname):
+                if p.is_file():
+                    rel = str(p.relative_to(ssd_path))
+                    tasks.append((rel, p, "bin", auv_id))
+                    break
+
+    # USBL logs
+    for rel_str in manifest["usbl_logs"]:
+        p = ssd_path / rel_str
+        if p.exists():
+            # try to infer AUV from path
+            m = AUV_PHYS_RE.search(rel_str)
+            auv_id = _normalize_auv_id(f"AUV{int(m.group(1)):03d}") if m else None
+            tasks.append((rel_str, p, "usbl", auv_id))
+
+    # MAG files
+    for category, files in manifest["mag_files"].items():
+        for rel_str in files:
+            p = ssd_path / rel_str
+            if p.exists():
+                tasks.append((rel_str, p, "mag", None))
+
+    # SSS KLF files
+    for rel_str in manifest["sss_files"].get("klf", []):
+        p = ssd_path / rel_str
+        if p.exists():
+            tasks.append((rel_str, p, "klf", None))
+
+    # Parallel extraction
+    def _extract(task):
+        rel, path, kind, auv_id = task
+        cov = extract_timestamps(path, kind)
+        return rel, cov, auv_id
+
+    with ThreadPoolExecutor(max_workers=8) as ex:
+        futures = {ex.submit(_extract, t): t for t in tasks}
+        for fut in as_completed(futures):
+            rel, cov, auv_id = fut.result()
+            if cov:
+                coverage[rel] = cov
+                if auv_id and cov.get("source") != "mtime_fallback":
+                    auv_file_windows[auv_id].append((
+                        cov["t_start"], cov["t_end"], rel
+                    ))
+
+    return coverage, dict(auv_file_windows)
+
+
+def compute_mission_window(coverage: dict) -> "dict | None":
+    """Global mission window: min t_start, max t_end over all files."""
+    starts = []
+    ends = []
+    for cov in coverage.values():
+        if cov.get("source") == "mtime_fallback":
+            continue
+        try:
+            starts.append(datetime.fromisoformat(cov["t_start"]))
+            ends.append(datetime.fromisoformat(cov["t_end"]))
+        except Exception:
+            pass
+    if not starts:
+        return None
+    t_start = min(starts)
+    t_end = max(ends)
+    return {
+        "t_start": t_start.isoformat(timespec="seconds"),
+        "t_end": t_end.isoformat(timespec="seconds"),
+        "duration_s": int((t_end - t_start).total_seconds()),
+    }
+
+
+def compute_auv_windows(auv_file_windows: dict) -> dict:
+    """Per-AUV windows with gap detection (>60s gap between sorted windows)."""
+    result = {}
+    for auv_id, windows in sorted(auv_file_windows.items()):
+        # Sort by t_start
+        sorted_wins = sorted(windows, key=lambda x: x[0])
+        starts = [datetime.fromisoformat(w[0]) for w in sorted_wins]
+        ends = [datetime.fromisoformat(w[1]) for w in sorted_wins]
+
+        t_start = min(starts)
+        t_end = max(ends)
+        dur = int((t_end - t_start).total_seconds())
+
+        # Detect gaps: merge overlapping windows first, then find gaps
+        gaps = []
+        # Build merged intervals
+        intervals = sorted(zip(starts, ends, [w[2] for w in sorted_wins]))
+        cur_start, cur_end, cur_label = intervals[0]
+        prev_label = cur_label
+        for i_start, i_end, i_label in intervals[1:]:
+            if i_start - cur_end > timedelta(seconds=60):
+                gaps.append({
+                    "from": cur_end.isoformat(timespec="seconds"),
+                    "to": i_start.isoformat(timespec="seconds"),
+                    "duration_s": int((i_start - cur_end).total_seconds()),
+                    "between": [prev_label, i_label],
+                })
+            if i_end > cur_end:
+                cur_end = i_end
+                prev_label = i_label
+
+        # Collect source categories
+        sources = sorted(set(
+            "videos" if w[2].endswith(".MP4") or w[2].endswith(".mp4") else
+            "mcap" if w[2].endswith(".mcap") else
+            "bin" if w[2].endswith(".BIN") or w[2].endswith(".bin") else
+            "usbl" if "usbl" in w[2].lower() else
+            "other"
+            for w in sorted_wins
+        ))
+
+        result[auv_id] = {
+            "t_start": t_start.isoformat(timespec="seconds"),
+            "t_end": t_end.isoformat(timespec="seconds"),
+            "duration_s": dur,
+            "sources": sources,
+            "gaps": gaps,
+        }
+    return result
+
+
+def safe_listdir(p: Path) -> "list[Path]":
+    if not p.exists() or not p.is_dir():
+        return []
+    try:
+        return sorted(p.iterdir())
+    except OSError:
+        return []
+
+
+def collect_videos(ssd_path: Path) -> "tuple[dict, int, float]":
+    """videos[AUV][GP1|GP2] -> sorted list of MP4 filenames. Two layouts tried."""
+    videos = defaultdict(lambda: defaultdict(list))
+    total_n = 0
+    total_bytes = 0
+
+    # Layout A: medias/videos/GP{n}{_|-}AUV{xxx}/*.MP4
+    vroot_a = ssd_path / "medias" / "videos"
+    for sub in safe_listdir(vroot_a):
+        if not sub.is_dir():
+            continue
+        if _is_old_path(sub):
+            continue
+        m = GP_FOLDER_RE.match(sub.name)
+        if not m:
+            continue
+        gp_key = f"GP{int(m.group('gp'))}"
+        auv_id = _normalize_auv_id(f"AUV{int(m.group('auv')):03d}")
+        for f in safe_listdir(sub):
+            if _is_old_path(f):
+                continue
+            if f.is_file() and f.suffix.upper() == ".MP4":
+                videos[auv_id][gp_key].append(f.name)
+                total_n += 1
+                try:
+                    total_bytes += f.stat().st_size
+                except OSError:
+                    pass
+
+    # Layout B: medias/videos/AUV{xxx}/GP{n}/*.MP4 (fallback)
+    for sub in safe_listdir(vroot_a):
+        if not sub.is_dir():
+            continue
+        if _is_old_path(sub):
+            continue
+        am = AUV_PHYS_RE.fullmatch(sub.name)
+        if not am:
+            continue
+        auv_id = _normalize_auv_id(f"AUV{int(am.group(1)):03d}")
+        for gp_sub in safe_listdir(sub):
+            if not gp_sub.is_dir():
+                continue
+            if _is_old_path(gp_sub):
+                continue
+            gm = re.fullmatch(r"GP(\d+)", gp_sub.name, re.I)
+            if not gm:
+                continue
+            gp_key = f"GP{int(gm.group(1))}"
+            for f in safe_listdir(gp_sub):
+                if _is_old_path(f):
+                    continue
+                if f.is_file() and f.suffix.upper() == ".MP4":
+                    videos[auv_id][gp_key].append(f.name)
+                    total_n += 1
+                    try:
+                        total_bytes += f.stat().st_size
+                    except OSError:
+                        pass
+
+    # Sort + dedup
+    out = {}
+    for auv in sorted(videos.keys()):
+        out[auv] = {}
+        for gp in sorted(videos[auv].keys()):
+            out[auv][gp] = sorted(set(videos[auv][gp]))
+    return out, total_n, total_bytes / 1e9
+
+
+def collect_mcap_bags(ssd_path: Path) -> "tuple[dict, int]":
+    """mcap_bags[AUV{nnn}] -> sorted list of bag-dir names."""
+    bags = defaultdict(list)
+    n = 0
+    broot = ssd_path / "logs" / "SUB" / "bag"
+    for sub in safe_listdir(broot):
+        if not sub.is_dir():
+            continue
+        if _is_old_path(sub):
+            continue
+        m = BAG_AUV_RE.search(sub.name)
+        if not m:
+            continue
+        auv_id = _normalize_auv_id(f"AUV{int(m.group('auv')):03d}")
+        bags[auv_id].append(sub.name)
+        n += 1
+    return {k: sorted(set(v)) for k, v in sorted(bags.items())}, n
+
+
+def collect_bin_files(ssd_path: Path) -> "tuple[dict, int]":
+    """bin_files[AUV{nnn}] -> sorted .BIN filenames."""
+    bins = defaultdict(list)
+    n = 0
+    # Layout: logs/SUB/AUV{xxx}/*.BIN
+    sub_root = ssd_path / "logs" / "SUB"
+    for sub in safe_listdir(sub_root):
+        if not sub.is_dir():
+            continue
+        if _is_old_path(sub):
+            continue
+        am = AUV_PHYS_RE.fullmatch(sub.name)
+        if not am:
+            continue
+        auv_id = _normalize_auv_id(f"AUV{int(am.group(1)):03d}")
+        for f in safe_listdir(sub):
+            if _is_old_path(f):
+                continue
+            if f.is_file() and f.suffix.upper() == ".BIN":
+                bins[auv_id].append(f.name)
+                n += 1
+    # Also try logs/SUB/bin/*.BIN (flat layout)
+    flat = ssd_path / "logs" / "SUB" / "bin"
+    for f in safe_listdir(flat):
+        if _is_old_path(f):
+            continue
+        if f.is_file() and f.suffix.upper() == ".BIN":
+            bins.setdefault("AUV000", []).append(f.name)
+            n += 1
+    return {k: sorted(set(v)) for k, v in sorted(bins.items())}, n
+
+
+def collect_usbl_logs(ssd_path: Path) -> "list[str]":
+    """Return list of USBL csv paths relative to ssd_path."""
+    out = []
+    logs_root = ssd_path / "logs"
+    if not logs_root.exists():
+        return out
+    for p in logs_root.rglob("*"):
+        try:
+            if not p.is_file():
+                continue
+        except OSError:
+            continue
+        if _is_old_path(p):
+            continue
+        name_l = p.name.lower()
+        if "usbl" in name_l and name_l.endswith(".csv"):
+            try:
+                out.append(str(p.relative_to(ssd_path)))
+            except ValueError:
+                out.append(str(p))
+    return sorted(set(out))
+
+
+def collect_audio_logs(ssd_path: Path) -> "list[str]":
+    out = []
+    aud = ssd_path / "audios"
+    for p in aud.rglob("*") if aud.exists() else []:
+        try:
+            if not p.is_file():
+                continue
+        except (OSError, ValueError):
+            continue
+        if _is_old_path(p):
+            continue
+        try:
+            out.append(str(p.relative_to(ssd_path)))
+        except ValueError:
+            continue
+    return sorted(out)
+
+
+def collect_sss_files(ssd_path: Path) -> "tuple[dict, int]":
+    """klf + bin under ssd_path/sss/, recursive."""
+    sss_root = ssd_path / "sss"
+    klf = []
+    bin_ = []
+    if sss_root.exists():
+        try:
+            for p in sss_root.rglob("*.klf"):
+                try:
+                    if p.is_file() and not _is_old_path(p):
+                        klf.append(str(p.relative_to(ssd_path)))
+                except (OSError, ValueError):
+                    continue
+            for p in sss_root.rglob("*.bin"):
+                try:
+                    if p.is_file() and not _is_old_path(p):
+                        bin_.append(str(p.relative_to(ssd_path)))
+                except (OSError, ValueError):
+                    continue
+        except OSError:
+            pass
+    result = {"klf": sorted(set(klf)), "bin": sorted(set(bin_))}
+    return result, len(result["klf"]) + len(result["bin"])
+
+
+def collect_mag_files(ssd_path: Path) -> "tuple[dict, int]":
+    """csv under ssd_path/mag/, categorised ar/av/side/other."""
+    mag_root = ssd_path / "mag"
+    out = {"ar": [], "av": [], "side": [], "other": []}
+    if mag_root.exists():
+        try:
+            for p in mag_root.rglob("*.csv"):
+                try:
+                    if not p.is_file():
+                        continue
+                    if _is_old_path(p):
+                        continue
+                    rel = str(p.relative_to(ssd_path))
+                    parts = p.parts
+                    if "ar" in parts:
+                        out["ar"].append(rel)
+                    elif "av" in parts:
+                        out["av"].append(rel)
+                    elif "side" in parts:
+                        out["side"].append(rel)
+                    else:
+                        out["other"].append(rel)
+                except (OSError, ValueError):
+                    continue
+        except OSError:
+            pass
+    result = {k: sorted(set(v)) for k, v in out.items()}
+    total = sum(len(v) for v in result.values())
+    return result, total
+
+
+def build_manifest(mission: str, ssd_base: Path) -> dict:
+    ssd_path = ssd_base / mission / "raw_data"
+    if not ssd_path.exists():
+        raise FileNotFoundError(f"raw_data not found: {ssd_path}")
+
+    videos, n_vids, total_gb = collect_videos(ssd_path)
+    mcap_bags, n_bags = collect_mcap_bags(ssd_path)
+    bin_files, n_bins = collect_bin_files(ssd_path)
+    usbl_logs = collect_usbl_logs(ssd_path)
+    audio_logs = collect_audio_logs(ssd_path)
+    sss_files, n_sss = collect_sss_files(ssd_path)
+    mag_files, n_mag = collect_mag_files(ssd_path)
+
+    # Parse mission date for date-focus filter
+    mission_date = _parse_mission_date(mission)
+    if mission_date:
+        win_start, win_end = _mission_date_window(mission_date)
+        mission_date_str = mission_date.strftime("%Y-%m-%d")
+        mission_date_window_utc = {
+            "start": win_start.isoformat(timespec="seconds"),
+            "end": win_end.isoformat(timespec="seconds"),
+        }
+        print(f"  mission_date: {mission_date_str} | window: {win_start.isoformat(timespec='seconds')} -> {win_end.isoformat(timespec='seconds')}")
+    else:
+        mission_date_str = None
+        mission_date_window_utc = None
+        win_start = win_end = None
+
+    manifest = {
+        "mission": mission,
+        "generated_at": iso_utc_now(),
+        "ssd_path": str(ssd_path),
+        "mission_date": mission_date_str,
+        "mission_date_window_utc": mission_date_window_utc,
+        "videos": videos,
+        "mcap_bags": mcap_bags,
+        "bin_files": bin_files,
+        "usbl_logs": usbl_logs,
+        "audio_logs": audio_logs,
+        "sss_files": sss_files,
+        "mag_files": mag_files,
+        "totals": {
+            "n_videos": n_vids,
+            "n_mcap_bags": n_bags,
+            "n_bin_files": n_bins,
+            "n_usbl_logs": len(usbl_logs),
+            "n_audio_logs": len(audio_logs),
+            "n_sss_files": n_sss,
+            "n_mag_files": n_mag,
+            "total_video_size_gb": round(total_gb, 2),
+        },
+    }
+
+    # --- Coverage extraction ---
+    coverage, auv_file_windows = build_coverage(manifest, ssd_base)
+
+    # --- Date-focus filter on coverage ---
+    n_filtered_old = 0  # already handled at collect time via _is_old_path
+    n_filtered_out_of_date = 0
+    if win_start and win_end:
+        filtered_coverage = {}
+        for rel, cov in coverage.items():
+            if cov.get("source") == "mtime_fallback":
+                filtered_coverage[rel] = cov
+                continue
+            try:
+                t_s = datetime.fromisoformat(cov["t_start"])
+                # Ensure tz-aware
+                if t_s.tzinfo is None:
+                    t_s = t_s.replace(tzinfo=timezone.utc)
+                if win_start <= t_s <= win_end:
+                    filtered_coverage[rel] = cov
+                else:
+                    n_filtered_out_of_date += 1
+            except Exception:
+                filtered_coverage[rel] = cov
+        if n_filtered_out_of_date:
+            print(f"  date_filter: removed {n_filtered_out_of_date} entries outside mission date window")
+        coverage = filtered_coverage
+
+        # Rebuild auv_file_windows from filtered coverage
+        kept_rels = set(coverage.keys())
+        auv_file_windows_filtered = defaultdict(list)
+        for auv_id, wins in auv_file_windows.items():
+            for w in wins:
+                if w[2] in kept_rels:
+                    auv_file_windows_filtered[auv_id].append(w)
+        auv_file_windows = dict(auv_file_windows_filtered)
+
+    mission_window = compute_mission_window(coverage)
+    auv_windows = compute_auv_windows(auv_file_windows)
+
+    # Coverage stats
+    import collections
+    source_counts = collections.Counter(v["source"] for v in coverage.values())
+    n_fallback = source_counts.get("mtime_fallback", 0)
+    n_total_cov = len(coverage)
+    if n_total_cov:
+        print(f"  coverage: {n_total_cov} files | fallback={n_fallback} "
+              f"({100*n_fallback//n_total_cov}%) | sources={dict(source_counts)}")
+
+    manifest["coverage"] = coverage
+    manifest["mission_window"] = mission_window
+    manifest["auv_windows"] = auv_windows
+    manifest["totals"]["n_filtered_old"] = n_filtered_old
+    manifest["totals"]["n_filtered_out_of_date"] = n_filtered_out_of_date
+
+    return manifest
+
+
+def main(argv=None):
+    ap = argparse.ArgumentParser(description="Stage 01 -- select mission, produce raw manifest.")
+    ap.add_argument("--mission", required=True, help="Mission folder name (e.g. 20260505-Lepradet)")
+    ap.add_argument("--ssd-base", default="/mnt/ssd", help="SSD base path (default /mnt/ssd)")
+    ap.add_argument("--out", default="/home/cosma/cosma-qc/data",
+                    help="Output base dir (manifest written to <out>/<mission>/01_manifest.json)")
+    args = ap.parse_args(argv)
+
+    ssd_base = Path(args.ssd_base)
+    out_base = Path(args.out)
+    out_dir = out_base / args.mission
+    out_dir.mkdir(parents=True, exist_ok=True)
+    out_path = out_dir / "01_manifest.json"
+
+    with track_stage("01_select_mission", args.mission, out_dir,
+                     output_files=[out_path]):
+        manifest = build_manifest(args.mission, ssd_base)
+        tmp = out_path.with_suffix(".json.tmp")
+        tmp.write_text(json.dumps(manifest, indent=2, ensure_ascii=False))
+        tmp.replace(out_path)
+
+        t = manifest["totals"]
+        print(f"[ok] {out_path}")
+        print(f"  videos={t['n_videos']} ({t['total_video_size_gb']} GB) "
+              f"bags={t['n_mcap_bags']} bins={t['n_bin_files']} "
+              f"usbl={t['n_usbl_logs']} audio={t['n_audio_logs']} "
+              f"sss={t['n_sss_files']} mag={t['n_mag_files']}")
+        print(f"  filtered: old={t.get('n_filtered_old', 0)} out_of_date={t.get('n_filtered_out_of_date', 0)}")
+        if manifest.get("mission_window"):
+            mw = manifest["mission_window"]
+            print(f"  mission_window: {mw['t_start']} -> {mw['t_end']} ({mw['duration_s']}s)")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/pipeline/stages/02_mission_run_detect.py
+++ b/pipeline/stages/02_mission_run_detect.py
@@ -0,0 +1,835 @@
+#!/usr/bin/env python3
+"""Stage 02 — Mission run detection (v4 state-based).
+
+Détecte runs via transitions /mavros/state (armed + mode) + validation rel_alt.
+Fallback depth-only si topic state absent.
+
+Usage:
+    python3 02_mission_run_detect.py --mission 20260508-sttropez [--ssd-base /mnt/ssd] [--out data/] [--dry-run]
+    python3 02_mission_run_detect.py --mission 20260505-Lepradet --out data/ \
+        --state-modes ALT_HOLD --require-armed --require-descent \
+        --min-mission-depth -2.0 --min-sustained-duration 30 --min-near-bottom-pct 50 --force
+"""
+
+import argparse
+import glob
+import json
+import re
+import statistics
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+from mcap.reader import make_reader
+from mcap_ros2.decoder import DecoderFactory
+
+# Local helper for stage time/memory tracking
+sys.path.insert(0, str(Path(__file__).parent))
+from _meta import track_stage  # noqa: E402
+
+# Mapping ID physique → ID logistique
+AUV_PHYSICAL_MAP = {
+    "AUV010": "AUV210",
+    "AUV012": "AUV212",
+    "AUV013": "AUV213",
+}
+
+TOPIC_REL_ALT = "/mavros/global_position/rel_alt"
+TOPIC_STATE = "/mavros/state"
+THRESHOLD = -0.3        # rel_alt < THRESHOLD → immersé
+NEED_STREAK = 30        # secondes consécutives pour confirmer début/fin
+MIN_DURATION = 60       # secondes minimum par run
+SMOOTH_WINDOW_S = 3     # fenêtre lissage médian (secondes à 1Hz = samples)
+
+# Default modes considérés comme "mission active"
+DEFAULT_STATE_MODES = {"AUTO", "GUIDED"}
+# Modes qui TERMINENT un run (si require-armed=False)
+STATE_STOP_MODES = {"SURFACE", "MANUAL"}
+
+# Filtrage "vraie mission" (modifiables via argparse)
+DEFAULT_MIN_MISSION_DEPTH = -3.0   # rel_alt doit atteindre ce seuil (m) — strict v5 2026-05-14
+DEFAULT_MIN_SUSTAINED_DURATION = 60.0  # pendant au moins N secondes consécutives — strict v5
+DEFAULT_MIN_NEAR_BOTTOM_PCT = 80.0  # % min du run où rel_alt < min_mission_depth — strict v5
+DEFAULT_MIN_DISPLACEMENT_M = 5.0   # futur stage06+: déplacement min USBL — documenté seulement v5
+
+# Filtrage "avant première plongée"
+DEFAULT_FIRST_SUBMERSION_DEPTH = -2.0   # seuil rel_alt pour considérer submergé
+DEFAULT_FIRST_SUBMERSION_DURATION = 5.0  # durée min continue (s) pour confirmer submersion
+
+
+# ---------------------------------------------------------------------------
+# Lecture MCAP rel_alt
+# ---------------------------------------------------------------------------
+
+def parse_mcap_relalt(mcap_path: Path):
+    """Lit rel_alt depuis un fichier mcap. Retourne liste de (epoch_s, rel_alt_m)."""
+    data = []
+    try:
+        with open(mcap_path, "rb") as fp:
+            reader = make_reader(fp, decoder_factories=[DecoderFactory()])
+            for _schema, _channel, message, ros_msg in reader.iter_decoded_messages(topics=[TOPIC_REL_ALT]):
+                ts = message.log_time / 1e9
+                data.append((ts, float(ros_msg.data)))
+    except Exception as exc:
+        print(f"  [WARN] skip {mcap_path.name}: {exc}", file=sys.stderr)
+    return data
+
+
+# ---------------------------------------------------------------------------
+# Lecture MCAP /mavros/state
+# ---------------------------------------------------------------------------
+
+def read_mavros_state(mcap_path: Path, t_start: float = None, t_end: float = None):
+    """
+    Lit /mavros/state depuis un fichier mcap.
+    Retourne liste de (epoch_s, armed:bool, mode:str).
+    Filtrage optionnel sur [t_start, t_end].
+    """
+    data = []
+    try:
+        with open(mcap_path, "rb") as fp:
+            reader = make_reader(fp, decoder_factories=[DecoderFactory()])
+            for _schema, _channel, message, ros_msg in reader.iter_decoded_messages(topics=[TOPIC_STATE]):
+                ts = message.log_time / 1e9
+                if t_start is not None and ts < t_start:
+                    continue
+                if t_end is not None and ts > t_end:
+                    continue
+                data.append((ts, bool(ros_msg.armed), str(ros_msg.mode)))
+    except Exception as exc:
+        # Silent — bag could be corrupt
+        pass
+    return data
+
+
+def parse_mcap_both(mcap_path: Path):
+    """Lit rel_alt ET state depuis un seul fichier mcap en un seul pass."""
+    alt_data = []
+    state_data = []
+    try:
+        with open(mcap_path, "rb") as fp:
+            reader = make_reader(fp, decoder_factories=[DecoderFactory()])
+            for _schema, _channel, message, ros_msg in reader.iter_decoded_messages(
+                    topics=[TOPIC_REL_ALT, TOPIC_STATE]):
+                ts = message.log_time / 1e9
+                if _channel.topic == TOPIC_REL_ALT:
+                    alt_data.append((ts, float(ros_msg.data)))
+                elif _channel.topic == TOPIC_STATE:
+                    state_data.append((ts, bool(ros_msg.armed), str(ros_msg.mode)))
+    except Exception as exc:
+        print(f"  [WARN] skip {mcap_path.name}: {exc}", file=sys.stderr)
+    return alt_data, state_data
+
+
+def extract_auv_id(folder_name: str):
+    """'20260508_054551_AUV010' → 'AUV010'."""
+    m = re.search(r"(AUV\d+)$", folder_name)
+    return m.group(1) if m else None
+
+
+# ---------------------------------------------------------------------------
+# Signal processing
+# ---------------------------------------------------------------------------
+
+def resample_1hz(data):
+    """Resample (epoch, val) → 1Hz par interpolation linéaire. Retourne (times[], vals[])."""
+    if not data:
+        return [], []
+    data = sorted(data)
+    t0 = int(data[0][0])
+    t1 = int(data[-1][0])
+    times = list(range(t0, t1 + 1))
+    vals = []
+    idx = 0
+    for t in times:
+        while idx < len(data) - 1 and data[idx + 1][0] < t:
+            idx += 1
+        if t <= data[0][0]:
+            v = data[0][1]
+        elif t >= data[-1][0]:
+            v = data[-1][1]
+        else:
+            t0_, v0 = data[idx]
+            t1_, v1 = data[idx + 1]
+            dt = t1_ - t0_
+            v = v0 + ((t - t0_) / dt) * (v1 - v0) if dt > 0 else v0
+        vals.append(v)
+    return times, vals
+
+
+def median_filter(vals, window=3):
+    half = window // 2
+    out = []
+    n = len(vals)
+    for i in range(n):
+        lo = max(0, i - half)
+        hi = min(n, i + half + 1)
+        w = sorted(vals[lo:hi])
+        out.append(w[len(w) // 2])
+    return out
+
+
+def find_first_submersion_epoch(rel_alt_times, rel_alt_vals, threshold=-2.0, min_duration=5.0):
+    """
+    Trouve la PREMIÈRE fois où rel_alt < threshold pendant >= min_duration secondes en continu.
+    Retourne epoch du début de cette submersion, ou None si jamais submergé.
+
+    Travaille sur données 1Hz lissées (times/vals déjà resampleés).
+    """
+    n = len(rel_alt_times)
+    if n == 0:
+        return None
+
+    streak_start = None
+    streak_count = 0
+
+    for i in range(n):
+        if rel_alt_vals[i] < threshold:
+            if streak_count == 0:
+                streak_start = rel_alt_times[i]
+            streak_count += 1
+            if streak_count >= min_duration:
+                return float(streak_start)
+        else:
+            streak_count = 0
+            streak_start = None
+
+    return None
+
+
+def detect_runs_depth_only(times, vals, threshold=THRESHOLD, need_streak=NEED_STREAK, min_duration=MIN_DURATION):
+    """
+    Détecte les runs d'immersion via rel_alt seul (fallback).
+
+    Algorithme:
+    - need_streak samples consécutifs < threshold pour confirmer entrée/sortie
+    - min_duration secondes minimum par run
+    Returns liste de (start_epoch, end_epoch).
+    """
+    n = len(times)
+    if n == 0:
+        return []
+
+    under = [v < threshold for v in vals]
+
+    runs = []
+    in_run = False
+    run_start = None
+    streak_under = 0
+    streak_surface = 0
+
+    i = 0
+    while i < n:
+        if not in_run:
+            if under[i]:
+                streak_under += 1
+                if streak_under >= need_streak:
+                    run_start = times[i - need_streak + 1]
+                    in_run = True
+                    streak_surface = 0
+            else:
+                streak_under = 0
+        else:
+            if under[i]:
+                streak_surface = 0
+            else:
+                streak_surface += 1
+                if streak_surface >= need_streak:
+                    run_end = times[i - need_streak + 1]
+                    dur = run_end - run_start
+                    if dur >= min_duration:
+                        runs.append((run_start, run_end))
+                    in_run = False
+                    run_start = None
+                    streak_under = 0
+                    streak_surface = 0
+        i += 1
+
+    if in_run and run_start is not None:
+        run_end = times[-1]
+        dur = run_end - run_start
+        if dur >= min_duration:
+            runs.append((run_start, run_end))
+
+    return runs
+
+
+# ---------------------------------------------------------------------------
+# State-based run detection
+# ---------------------------------------------------------------------------
+
+def detect_runs_state_based(state_data, times_alt, vals_smooth, mission_modes, require_armed,
+                             require_descent, min_duration, min_depth_m=-0.3):
+    """
+    Détecte runs via /mavros/state:
+    - Intervalles continus armed=True (si require_armed) AND mode in mission_modes
+    - Valide via transition rel_alt surface → fond (si require_descent)
+    - min_duration filtre courtes fenêtres
+
+    Returns:
+        list of (start_epoch, end_epoch, dominant_mode)
+    """
+    if not state_data:
+        return []
+
+    state_data_sorted = sorted(state_data)
+
+    # Construire séquence d'états
+    # Identifier intervalles "mission active"
+    intervals = []
+    cur_start = None
+    cur_mode_counts = {}
+
+    for ts, armed, mode in state_data_sorted:
+        active = (not require_armed or armed) and (mode in mission_modes)
+
+        if active and cur_start is None:
+            cur_start = ts
+            cur_mode_counts = {mode: 1}
+        elif active and cur_start is not None:
+            cur_mode_counts[mode] = cur_mode_counts.get(mode, 0) + 1
+        elif not active and cur_start is not None:
+            # Fin de l'intervalle
+            intervals.append((cur_start, ts, cur_mode_counts))
+            cur_start = None
+            cur_mode_counts = {}
+
+    # Flush dernier intervalle
+    if cur_start is not None:
+        intervals.append((cur_start, state_data_sorted[-1][0], cur_mode_counts))
+
+    # Filtrer par durée min
+    runs = []
+    for start_t, end_t, mode_counts in intervals:
+        dur = end_t - start_t
+        if dur < min_duration:
+            continue
+
+        dominant_mode = max(mode_counts, key=mode_counts.get) if mode_counts else "UNKNOWN"
+
+        if require_descent:
+            # Vérifier transition surface → fond dans la fenêtre
+            window_alts = [(t, v) for t, v in zip(times_alt, vals_smooth)
+                           if start_t <= t <= end_t]
+            if not window_alts:
+                # Pas de données alt dans cette fenêtre — skip
+                continue
+            alt_vals = [v for _, v in window_alts]
+            # Début = surface (rel_alt > -1m dans les 60 premiers samples)
+            first_segment = alt_vals[:60]
+            last_segment = alt_vals[-60:]
+            starts_near_surface = any(v > -1.0 for v in first_segment)
+            reaches_depth = any(v <= min_depth_m for v in alt_vals)
+            if not (starts_near_surface and reaches_depth):
+                continue
+
+        runs.append((start_t, end_t, dominant_mode))
+
+    return runs
+
+
+# ---------------------------------------------------------------------------
+# Validation "vraie mission" — filtre oscillations surface
+# ---------------------------------------------------------------------------
+
+def compute_sustained_depth(times, vals_smooth, start_e, end_e, min_depth_m):
+    """
+    Pour un run [start_e, end_e], calcule le plus long segment continu
+    où rel_alt <= min_depth_m (ex: -2.0m).
+    """
+    run_pairs = [(t, v) for t, v in zip(times, vals_smooth) if start_e <= t <= end_e]
+    if not run_pairs:
+        return 0.0, 0.0
+
+    best_dur = 0
+    best_min = 0.0
+    cur_dur = 0
+    cur_min = 0.0
+
+    for _t, v in run_pairs:
+        if v <= min_depth_m:
+            cur_dur += 1
+            cur_min = min(cur_min, v) if cur_dur > 1 else v
+        else:
+            if cur_dur > best_dur:
+                best_dur = cur_dur
+                best_min = cur_min
+            cur_dur = 0
+            cur_min = 0.0
+
+    if cur_dur > best_dur:
+        best_dur = cur_dur
+        best_min = cur_min
+
+    return round(best_min, 2), float(best_dur)
+
+
+def compute_near_bottom_pct(times, vals_smooth, start_e, end_e, min_depth_m):
+    """Calcule le % de samples dans [start_e, end_e] où rel_alt < min_depth_m."""
+    run_vals = [v for t, v in zip(times, vals_smooth) if start_e <= t <= end_e]
+    if not run_vals:
+        return 0.0
+    n_below = sum(1 for v in run_vals if v < min_depth_m)
+    return round(100.0 * n_below / len(run_vals), 1)
+
+
+# ---------------------------------------------------------------------------
+# Processing AUV
+# ---------------------------------------------------------------------------
+
+def process_auv(auv_mcap_id: str, bag_dirs: list, ssd_base: Path,
+                min_mission_depth: float, min_sustained_duration: float,
+                min_near_bottom_pct: float = 0.0,
+                state_modes: set = None, require_armed: bool = True,
+                require_descent: bool = True,
+                first_submersion_depth: float = DEFAULT_FIRST_SUBMERSION_DEPTH,
+                first_submersion_duration: float = DEFAULT_FIRST_SUBMERSION_DURATION):
+    """
+    Agréger tous les bags d'un AUV, détecter runs via state (avec fallback depth-only).
+    Filtre les runs entiers avant la première vraie submersion.
+    """
+    physical_id = AUV_PHYSICAL_MAP.get(auv_mcap_id, auv_mcap_id)
+    if state_modes is None:
+        state_modes = DEFAULT_STATE_MODES
+
+    all_alt_data = []
+    all_state_data = []
+    bags_list = []
+    n_parsed = 0
+    n_skipped = 0
+
+    # Parcourir bags dans l'ordre chronologique
+    for bag_dir in sorted(bag_dirs):
+        bags_list.append(str(bag_dir.name))
+        mcap_files = sorted(bag_dir.glob("*.mcap"))
+        for mcap_path in mcap_files:
+            alt_pts, state_pts = parse_mcap_both(mcap_path)
+            if alt_pts or state_pts:
+                all_alt_data.extend(alt_pts)
+                all_state_data.extend(state_pts)
+                n_parsed += 1
+            else:
+                n_skipped += 1
+            if (n_parsed + n_skipped) % 50 == 0:
+                print(f"  [{auv_mcap_id}] ... {n_parsed + n_skipped} bags processed", file=sys.stderr)
+
+    has_state = len(all_state_data) > 0
+    print(f"  [{auv_mcap_id}] {n_parsed} mcap OK, {n_skipped} skip, "
+          f"{len(all_alt_data)} rel_alt pts, {len(all_state_data)} state pts", file=sys.stderr)
+
+    if not all_alt_data:
+        print(f"  [{auv_mcap_id}] [WARN] aucune donnée rel_alt", file=sys.stderr)
+        return {
+            "mcap_id": auv_mcap_id,
+            "physical_id": physical_id,
+            "bags": bags_list,
+            "total_immersion_s": 0,
+            "runs": [],
+            "runs_rejected": [],
+            "detection_method": "no_data",
+            "first_submersion_epoch": None,
+            "pre_water_rejected_count": 0,
+        }
+
+    # Dédup + tri rel_alt
+    seen = set()
+    deduped_alt = []
+    for t, v in sorted(all_alt_data):
+        key = round(t, 3)
+        if key not in seen:
+            seen.add(key)
+            deduped_alt.append((t, v))
+
+    # Resample 1Hz
+    times, vals = resample_1hz(deduped_alt)
+    print(f"  [{auv_mcap_id}] {len(times)} samples 1Hz [{times[0]:.0f}..{times[-1]:.0f}]", file=sys.stderr)
+
+    # Lissage médian 3s
+    vals_smooth = median_filter(vals, window=SMOOTH_WINDOW_S)
+
+    # Trouver première submersion réelle
+    first_sub_epoch = find_first_submersion_epoch(
+        times, vals_smooth,
+        threshold=first_submersion_depth,
+        min_duration=first_submersion_duration,
+    )
+    if first_sub_epoch is not None:
+        print(f"  [{auv_mcap_id}] Première submersion réelle: {first_sub_epoch:.0f} "
+              f"({datetime.fromtimestamp(first_sub_epoch, tz=timezone.utc).isoformat()})",
+              file=sys.stderr)
+    else:
+        print(f"  [{auv_mcap_id}] [WARN] Aucune submersion réelle détectée "
+              f"(threshold={first_submersion_depth}m, min_dur={first_submersion_duration}s)",
+              file=sys.stderr)
+
+    # Détection runs
+    detection_method = "depth_only"
+    if has_state:
+        # Trier state data
+        all_state_data.sort(key=lambda x: x[0])
+
+        # Extraire modes présents
+        modes_in_data = set(m for _, _, m in all_state_data)
+        modes_active = modes_in_data & state_modes
+        print(f"  [{auv_mcap_id}] state modes found: {modes_in_data}", file=sys.stderr)
+        print(f"  [{auv_mcap_id}] state modes matching --state-modes: {modes_active}", file=sys.stderr)
+
+        if modes_active:
+            raw_runs_with_mode = detect_runs_state_based(
+                all_state_data, times, vals_smooth,
+                mission_modes=state_modes,
+                require_armed=require_armed,
+                require_descent=require_descent,
+                min_duration=MIN_DURATION,
+                min_depth_m=min_mission_depth,
+            )
+            raw_runs = [(s, e) for s, e, _m in raw_runs_with_mode]
+            run_modes = {(s, e): m for s, e, m in raw_runs_with_mode}
+            detection_method = "state_based"
+            print(f"  [{auv_mcap_id}] {len(raw_runs)} candidats state-based", file=sys.stderr)
+        else:
+            print(f"  [{auv_mcap_id}] [WARN] aucun mode {state_modes} dans state data "
+                  f"→ fallback depth-only", file=sys.stderr)
+            raw_runs = detect_runs_depth_only(times, vals_smooth)
+            run_modes = {}
+            detection_method = "depth_only_fallback_no_modes"
+    else:
+        print(f"  [{auv_mcap_id}] [WARN] topic /mavros/state vide → fallback depth-only", file=sys.stderr)
+        raw_runs = detect_runs_depth_only(times, vals_smooth)
+        run_modes = {}
+
+    print(f"  [{auv_mcap_id}] {len(raw_runs)} runs candidats ({detection_method})", file=sys.stderr)
+
+    # Filtrage pre-water: exclure/tronquer runs avant première submersion
+    pre_water_rejected_count = 0
+    if first_sub_epoch is not None:
+        filtered_runs = []
+        for start_e, end_e in raw_runs:
+            if end_e <= first_sub_epoch:
+                # Run entier avant première plongée → rejeter
+                pre_water_rejected_count += 1
+                phys_id_log = AUV_PHYSICAL_MAP.get(auv_mcap_id, auv_mcap_id)
+                print(f"  [{auv_mcap_id}] PRE-WATER REJECT run [{start_e:.0f}..{end_e:.0f}] "
+                      f"(end before first_sub={first_sub_epoch:.0f})", file=sys.stderr)
+            elif start_e < first_sub_epoch < end_e:
+                # Run chevauche → tronquer start
+                new_start = first_sub_epoch
+                print(f"  [{auv_mcap_id}] PRE-WATER TRUNCATE run [{start_e:.0f}..{end_e:.0f}] "
+                      f"→ [{new_start:.0f}..{end_e:.0f}]", file=sys.stderr)
+                # Mettre à jour run_modes si besoin
+                if (start_e, end_e) in run_modes:
+                    run_modes[(new_start, end_e)] = run_modes.pop((start_e, end_e))
+                filtered_runs.append((new_start, end_e))
+            else:
+                filtered_runs.append((start_e, end_e))
+        raw_runs = filtered_runs
+        print(f"  [{auv_mcap_id}] {pre_water_rejected_count} runs rejetés pre-water, "
+              f"{len(raw_runs)} restants", file=sys.stderr)
+
+    # Construire runs enrichis + filtre "vraie mission"
+    runs_out = []
+    runs_rejected = []
+    for idx, (start_e, end_e) in enumerate(raw_runs):
+        dur = end_e - start_e
+        run_id = f"{physical_id}_run_{idx:02d}"
+        dominant_mode = run_modes.get((start_e, end_e), "UNKNOWN")
+
+        run_vals = [v for t, v in zip(times, vals_smooth) if start_e <= t <= end_e]
+        max_depth = round(min(run_vals), 2) if run_vals else 0.0
+        mean_depth = round(statistics.mean(run_vals), 2) if run_vals else 0.0
+
+        sustained_below_m, sustained_duration_s = compute_sustained_depth(
+            times, vals_smooth, start_e, end_e, min_mission_depth
+        )
+        pct_near_bottom = compute_near_bottom_pct(
+            times, vals_smooth, start_e, end_e, min_mission_depth
+        )
+
+        run_entry = {
+            "run_id": run_id,
+            "start_epoch": float(start_e),
+            "end_epoch": float(end_e),
+            "duration_s": round(dur, 1),
+            "max_depth_m": max_depth,
+            "mean_depth_m": mean_depth,
+            "sustained_below_m": sustained_below_m,
+            "sustained_duration_s": sustained_duration_s,
+            "pct_near_bottom": pct_near_bottom,
+            "dominant_mode": dominant_mode,
+            "detection_method": detection_method,
+        }
+
+        reject_reason = None
+        if sustained_duration_s < min_sustained_duration:
+            reject_reason = (
+                f"sustained_depth only {sustained_duration_s:.0f}s "
+                f"(need {min_sustained_duration:.0f}s below {min_mission_depth}m)"
+            )
+        elif min_near_bottom_pct > 0.0 and pct_near_bottom < min_near_bottom_pct:
+            reject_reason = (
+                f"not_enough_immersion: only {pct_near_bottom:.1f}% "
+                f"time below {min_mission_depth}m (need {min_near_bottom_pct:.1f}%)"
+            )
+
+        if reject_reason is None:
+            runs_out.append(run_entry)
+        else:
+            run_entry["rejected_reason"] = reject_reason
+            runs_rejected.append(run_entry)
+            print(
+                f"  [{auv_mcap_id}] REJECT {run_id}: max_depth={max_depth}m "
+                f"sustained={sustained_duration_s:.0f}s pct={pct_near_bottom:.1f}% "
+                f"reason={reject_reason[:60]}",
+                file=sys.stderr,
+            )
+
+    print(
+        f"  [{auv_mcap_id}] {len(runs_out)} runs OK, {len(runs_rejected)} rejetés ({detection_method})",
+        file=sys.stderr,
+    )
+
+    total_immersion_s = round(sum(r["duration_s"] for r in runs_out), 1)
+
+    return {
+        "mcap_id": auv_mcap_id,
+        "physical_id": physical_id,
+        "bags": bags_list,
+        "total_immersion_s": total_immersion_s,
+        "runs": runs_out,
+        "runs_rejected": runs_rejected,
+        "detection_method": detection_method,
+        "first_submersion_epoch": first_sub_epoch,
+        "pre_water_rejected_count": pre_water_rejected_count,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def _run(args):
+    bags_root = Path(args.ssd_base) / args.mission / "raw_data" / "logs" / "SUB" / "bag"
+    if not bags_root.exists():
+        print(f"[ERROR] bags dir not found: {bags_root}", file=sys.stderr)
+        sys.exit(1)
+
+    state_modes = set(m.strip() for m in args.state_modes.split(",") if m.strip())
+
+    print(f"[stage02] Mission: {args.mission}")
+    print(f"[stage02] Bags root: {bags_root}", file=sys.stderr)
+    print(f"[stage02] State modes: {state_modes}, require_armed={args.require_armed}, "
+          f"require_descent={args.require_descent}", file=sys.stderr)
+    near_bottom_str = (f" + near_bottom >= {args.min_near_bottom_pct:.0f}%"
+                       if args.min_near_bottom_pct > 0 else "")
+    print(
+        f"[stage02] Filtre mission: sustained >= {args.min_sustained_duration}s "
+        f"below {args.min_mission_depth}m{near_bottom_str}",
+        file=sys.stderr,
+    )
+    print(
+        f"[stage02] Filtre pre-water: first_submersion threshold={args.first_submersion_depth}m "
+        f"min_duration={args.first_submersion_duration}s",
+        file=sys.stderr,
+    )
+
+    # Grouper dossiers bag par AUV ID
+    auv_dirs = {}
+    for d in sorted(bags_root.iterdir()):
+        if not d.is_dir():
+            continue
+        auv_id = extract_auv_id(d.name)
+        if auv_id:
+            auv_dirs.setdefault(auv_id, []).append(d)
+
+    print(f"[stage02] AUVs: {sorted(auv_dirs.keys())}")
+
+    # Traiter chaque AUV
+    auvs_out = {}
+    all_runs_flat = []
+    all_rejected_flat = []
+
+    for auv_mcap_id in sorted(auv_dirs.keys()):
+        physical_id = AUV_PHYSICAL_MAP.get(auv_mcap_id, auv_mcap_id)
+        print(f"\n[stage02] Processing {auv_mcap_id} -> {physical_id} ({len(auv_dirs[auv_mcap_id])} bag dirs)...")
+        result = process_auv(
+            auv_mcap_id,
+            auv_dirs[auv_mcap_id],
+            Path(args.ssd_base),
+            min_mission_depth=args.min_mission_depth,
+            min_sustained_duration=args.min_sustained_duration,
+            min_near_bottom_pct=args.min_near_bottom_pct,
+            state_modes=state_modes,
+            require_armed=args.require_armed,
+            require_descent=args.require_descent,
+            first_submersion_depth=args.first_submersion_depth,
+            first_submersion_duration=args.first_submersion_duration,
+        )
+        auvs_out[physical_id] = result
+        all_runs_flat.extend(result["runs"])
+        all_rejected_flat.extend(result.get("runs_rejected", []))
+
+    # Trier tous les runs par start_epoch
+    all_runs_sorted = sorted(all_runs_flat, key=lambda r: r["start_epoch"])
+    all_rejected_sorted = sorted(all_rejected_flat, key=lambda r: r["start_epoch"])
+
+    # Construire output JSON
+    output = {
+        "mission": args.mission,
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "filter_params": {
+            "min_mission_depth_m": args.min_mission_depth,
+            "min_sustained_duration_s": args.min_sustained_duration,
+            "min_near_bottom_pct": args.min_near_bottom_pct,
+            "state_modes": sorted(state_modes),
+            "require_armed": args.require_armed,
+            "require_descent": args.require_descent,
+            "first_submersion_depth_m": args.first_submersion_depth,
+            "first_submersion_duration_s": args.first_submersion_duration,
+        },
+        "auvs": auvs_out,
+        "all_runs_sorted": all_runs_sorted,
+        "all_runs_rejected": all_rejected_sorted,
+    }
+
+    # Summary
+    total_runs = len(all_runs_sorted)
+    total_rejected = len(all_rejected_sorted)
+    all_depths = [r["max_depth_m"] for r in all_runs_sorted]
+    global_max_depth = min(all_depths) if all_depths else 0.0
+    total_immersion = sum(r["duration_s"] for r in all_runs_sorted)
+    total_pre_water = sum(v.get("pre_water_rejected_count", 0) for v in auvs_out.values())
+
+    print(f"\n{'='*60}")
+    print(f"[stage02] SUMMARY — {args.mission}")
+    print(f"{'='*60}")
+    for phys_id, auv in auvs_out.items():
+        deep = [r for r in auv["runs"] if r["max_depth_m"] < -10.0]
+        rej = len(auv.get("runs_rejected", []))
+        method = auv.get("detection_method", "?")
+        fsub = auv.get("first_submersion_epoch")
+        fsub_str = (datetime.fromtimestamp(fsub, tz=timezone.utc).isoformat()
+                    if fsub else "None")
+        pre_w = auv.get("pre_water_rejected_count", 0)
+        print(f"  {phys_id}: {auv['total_immersion_s']}s immersion, "
+              f"{len(auv['runs'])} runs OK, {rej} rejetés, runs>10m: {len(deep)} [{method}]")
+        print(f"    first_submersion: {fsub_str} | pre_water_rejected: {pre_w}")
+    print(f"  Total runs OK: {total_runs}")
+    print(f"  Total runs rejetés: {total_rejected}")
+    print(f"  Total pre-water rejetés: {total_pre_water}")
+    print(f"  Global max depth: {global_max_depth:.1f}m")
+    print(f"  Total immersion: {total_immersion:.0f}s")
+
+    if total_runs == 0:
+        print("[WARN] Aucun run validé!")
+    else:
+        print(f"[QC OK] {total_runs} runs validés")
+        for r in all_runs_sorted:
+            print(f"  {r['run_id']}: {r['duration_s']:.0f}s depth={r['max_depth_m']}m "
+                  f"pct={r['pct_near_bottom']:.1f}% mode={r.get('dominant_mode','?')} "
+                  f"method={r.get('detection_method','?')}")
+
+    if args.dry_run:
+        print(f"\n[dry-run] JSON non écrit.")
+        return
+
+    # Écrire JSON
+    out_dir = Path(args.out) / args.mission
+    out_dir.mkdir(parents=True, exist_ok=True)
+    out_path = out_dir / "02_runs.json"
+
+    with open(out_path, "w") as f:
+        json.dump(output, f, indent=2)
+
+    print(f"[stage02] Written: {out_path}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Stage 02 — Detect mission underwater runs (v4 state-based)")
+    parser.add_argument("--mission", required=True, help="Mission folder, e.g. 20260508-sttropez")
+    parser.add_argument("--ssd-base", default="/mnt/ssd", help="SSD root path (READ-ONLY)")
+    parser.add_argument("--out", default="data/", help="Output base dir")
+    parser.add_argument("--dry-run", action="store_true", help="Print stats sans écrire fichier")
+    parser.add_argument(
+        "--min-mission-depth",
+        type=float,
+        default=DEFAULT_MIN_MISSION_DEPTH,
+        help=f"Profondeur seuil (m, négatif) pour valider un run (default: {DEFAULT_MIN_MISSION_DEPTH})",
+    )
+    parser.add_argument(
+        "--min-sustained-duration",
+        type=float,
+        default=DEFAULT_MIN_SUSTAINED_DURATION,
+        help=f"Durée min (s) consécutive sous min-mission-depth (default: {DEFAULT_MIN_SUSTAINED_DURATION})",
+    )
+    parser.add_argument(
+        "--min-near-bottom-pct",
+        type=float,
+        default=DEFAULT_MIN_NEAR_BOTTOM_PCT,
+        help=f"% min du run où rel_alt < min-mission-depth (0=désactivé, default: {DEFAULT_MIN_NEAR_BOTTOM_PCT})",
+    )
+    parser.add_argument(
+        "--state-modes",
+        type=str,
+        default=",".join(sorted(DEFAULT_STATE_MODES)),
+        help="CSV modes ArduSub considérés mission (ex: AUTO,GUIDED ou ALT_HOLD). "
+             f"Default: {','.join(sorted(DEFAULT_STATE_MODES))}",
+    )
+    parser.add_argument(
+        "--require-armed",
+        action="store_true",
+        default=True,
+        help="Exiger armed=True (défaut: True)",
+    )
+    parser.add_argument(
+        "--no-require-armed",
+        action="store_false",
+        dest="require_armed",
+        help="Ne pas exiger armed=True",
+    )
+    parser.add_argument(
+        "--require-descent",
+        action="store_true",
+        default=True,
+        help="Exiger transition surface→fond dans fenêtre (défaut: True)",
+    )
+    parser.add_argument(
+        "--no-require-descent",
+        action="store_false",
+        dest="require_descent",
+        help="Ne pas exiger transition surface→fond",
+    )
+    parser.add_argument(
+        "--first-submersion-depth",
+        type=float,
+        default=DEFAULT_FIRST_SUBMERSION_DEPTH,
+        help=f"Seuil rel_alt (m) pour détecter première submersion réelle "
+             f"(default: {DEFAULT_FIRST_SUBMERSION_DEPTH})",
+    )
+    parser.add_argument(
+        "--first-submersion-duration",
+        type=float,
+        default=DEFAULT_FIRST_SUBMERSION_DURATION,
+        help=f"Durée min continue (s) sous first-submersion-depth pour confirmer mise à l'eau "
+             f"(default: {DEFAULT_FIRST_SUBMERSION_DURATION})",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Forcer recalcul même si fichier existe",
+    )
+    args = parser.parse_args()
+
+    if args.dry_run:
+        _run(args)
+        return
+
+    out_dir = Path(args.out) / args.mission
+    out_dir.mkdir(parents=True, exist_ok=True)
+    out_path = out_dir / "02_runs.json"
+    with track_stage("02_mission_run_detect", args.mission, out_dir,
+                     output_files=[out_path]):
+        _run(args)
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/02_nav_parse.py
+++ b/pipeline/stages/02_nav_parse.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+"""Stage 02 — Parse navigation from ROS2 MCAP bag files.
+
+Extracts per-AUV trajectories from MCAP bags using mcap_ros2:
+- /mavros/global_position/global  → NavSatFix (lat, lon, alt)
+- /mavros/imu/data               → Imu (qx, qy, qz, qw)
+- /mavros/imu/static_pressure    → FluidPressure (pressure_pa)
+
+Joins on nearest timestamp (tolerance 100ms).
+Saves parquet: ~/cosma-pipeline/data/<mission>/nav/<AUV>_<segment>.parquet
+
+Fallback: if no MCAP GPS data, marks as degraded=True (GPS=0 under water is normal).
+
+Usage:
+    python3 02_nav_parse.py /home/cosma/cosma-pipeline/20260505-Lepradet/manifest.json
+    python3 02_nav_parse.py /path/manifest.json --auv AUV013
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+import numpy as np
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from orchestrator.db import init_db, get_conn, upsert_job, record_metric, now_iso
+
+PIPELINE_BASE = Path(os.environ.get("COSMA_PIPELINE_BASE", "/home/cosma/cosma-pipeline"))
+NAV_TOPICS = [
+    "/mavros/global_position/global",
+    "/mavros/imu/data",
+    "/mavros/imu/static_pressure",
+]
+
+
+def parse_mcap_segment(mcap_files: list[Path]) -> dict[str, list]:
+    """Extract raw topic data from a list of MCAP files (one session/segment).
+    Returns dict keyed by topic -> list of (ts_ns, data_dict).
+    """
+    from mcap_ros2.reader import read_ros2_messages
+
+    topic_data: dict[str, list] = {t: [] for t in NAV_TOPICS}
+
+    for mcap_path in mcap_files:
+        if not mcap_path.exists():
+            continue
+        try:
+            for msg in read_ros2_messages(str(mcap_path), topics=NAV_TOPICS):
+                topic = msg.channel.topic
+                m = msg.ros_msg
+                ts_ns = int(msg.log_time.timestamp() * 1e9)
+
+                if topic == "/mavros/global_position/global":
+                    topic_data[topic].append((ts_ns, {
+                        "lat": float(m.latitude),
+                        "lon": float(m.longitude),
+                        "alt": float(m.altitude),
+                    }))
+                elif topic == "/mavros/imu/data":
+                    topic_data[topic].append((ts_ns, {
+                        "qx": float(m.orientation.x),
+                        "qy": float(m.orientation.y),
+                        "qz": float(m.orientation.z),
+                        "qw": float(m.orientation.w),
+                    }))
+                elif topic == "/mavros/imu/static_pressure":
+                    topic_data[topic].append((ts_ns, {
+                        "pressure_pa": float(m.fluid_pressure),
+                    }))
+        except Exception as e:
+            print(f"  [02] Error reading {mcap_path.name}: {e}")
+
+    return topic_data
+
+
+def join_topics(topic_data: dict[str, list], tol_ns: int = 100_000_000) -> list[dict]:
+    """Join NavSatFix + Imu + FluidPressure on nearest timestamp (100ms tol).
+    Base timeline = NavSatFix if available, else Imu.
+    """
+    import pandas as pd
+
+    nav_pts = topic_data.get("/mavros/global_position/global", [])
+    imu_pts = topic_data.get("/mavros/imu/data", [])
+    pres_pts = topic_data.get("/mavros/imu/static_pressure", [])
+
+    if not nav_pts and not imu_pts:
+        return []
+
+    # Build DataFrames
+    if nav_pts:
+        df_nav = pd.DataFrame([{"ts_ns": ts, **d} for ts, d in nav_pts])
+    else:
+        df_nav = pd.DataFrame(columns=["ts_ns", "lat", "lon", "alt"])
+
+    if imu_pts:
+        df_imu = pd.DataFrame([{"ts_ns": ts, **d} for ts, d in imu_pts])
+    else:
+        df_imu = pd.DataFrame(columns=["ts_ns", "qx", "qy", "qz", "qw"])
+
+    if pres_pts:
+        df_pres = pd.DataFrame([{"ts_ns": ts, **d} for ts, d in pres_pts])
+    else:
+        df_pres = pd.DataFrame(columns=["ts_ns", "pressure_pa"])
+
+    # Use nav as base if it has data, else imu
+    base_df = df_nav if len(df_nav) > 0 else df_imu
+    base_df = base_df.sort_values("ts_ns").reset_index(drop=True)
+
+    # Merge-as-of for IMU
+    result = base_df.copy()
+    if len(df_imu) > 0:
+        df_imu_s = df_imu.sort_values("ts_ns").reset_index(drop=True)
+        # Simple nearest-neighbor join
+        imu_ts = df_imu_s["ts_ns"].values
+        for col in ["qx", "qy", "qz", "qw"]:
+            result[col] = np.nan
+        for i, row_ts in enumerate(result["ts_ns"].values):
+            idx = np.argmin(np.abs(imu_ts - row_ts))
+            if abs(imu_ts[idx] - row_ts) <= tol_ns:
+                for col in ["qx", "qy", "qz", "qw"]:
+                    result.at[i, col] = float(df_imu_s.at[idx, col])
+
+    # Merge pressure
+    if len(df_pres) > 0:
+        df_pres_s = df_pres.sort_values("ts_ns").reset_index(drop=True)
+        pres_ts = df_pres_s["ts_ns"].values
+        result["pressure_pa"] = np.nan
+        for i, row_ts in enumerate(result["ts_ns"].values):
+            idx = np.argmin(np.abs(pres_ts - row_ts))
+            if abs(pres_ts[idx] - row_ts) <= tol_ns:
+                result.at[i, "pressure_pa"] = float(df_pres_s.at[idx, "pressure_pa"])
+
+    # Ensure all columns exist
+    for col in ["lat", "lon", "alt", "qx", "qy", "qz", "qw", "pressure_pa"]:
+        if col not in result.columns:
+            result[col] = np.nan
+
+    return result.to_dict("records")
+
+
+def parse_auv(manifest: dict, auv_id: str, out_dir: Path) -> dict:
+    """Parse all MCAP sessions for one AUV. Returns metrics."""
+    from pathlib import Path as P
+
+    metrics = {
+        "auv_id": auv_id,
+        "segments": [],
+        "total_points": 0,
+        "degraded": False,
+        "status": "ok",
+    }
+
+    bag_sessions = manifest.get("bag_sessions_per_auv", {}).get(auv_id, [])
+    if not bag_sessions:
+        auv_map = manifest.get("auv_mapping", {})
+        bag_auv = auv_map.get(auv_id)
+        if bag_auv:
+            bag_sessions = manifest.get("bag_sessions_per_auv", {}).get(bag_auv, [])
+
+    if not bag_sessions:
+        # Build from raw SSD structure
+        ssd_path = P(manifest.get("ssd_path", "/mnt/ssd") + "/" + manifest["mission"].split("-")[0] + "-" + manifest["mission"].split("-")[1] if "-" in manifest["mission"] else manifest.get("ssd_path", "/mnt/ssd"))
+        auv_num = auv_id.replace("AUV", "0")  # AUV013 -> 0013? No: AUV013 -> AUV013
+        bag_root = P(manifest.get("ssd_path", "/mnt/ssd")) / "raw_data/logs/SUB/bag"
+        sessions = sorted(bag_root.glob(f"*_{auv_id}"))
+        bag_sessions = [{"label": s.name, "mcap_files": [str(f) for f in sorted(s.glob("*.mcap"))]} for s in sessions]
+
+    import pandas as pd
+
+    all_points_total = 0
+    for sess in bag_sessions:
+        label = sess.get("session", sess.get("label", "unknown"))
+        mcap_files = [P(f) for f in sess.get("mcap_files", [])]
+        if not mcap_files:
+            continue
+
+        out_parquet = out_dir / f"{auv_id}_{label}.parquet"
+        if out_parquet.exists():
+            df_ex = pd.read_parquet(out_parquet)
+            n = len(df_ex)
+            print(f"  [02] {auv_id}/{label}: cached ({n} pts)")
+            all_points_total += n
+            metrics["segments"].append({"label": label, "points": n, "cached": True})
+            continue
+
+        print(f"  [02] {auv_id}/{label}: parsing {len(mcap_files)} MCAP files...")
+        topic_data = parse_mcap_segment(mcap_files)
+        points = join_topics(topic_data)
+
+        if not points:
+            print(f"  [02] {auv_id}/{label}: no data")
+            metrics["segments"].append({"label": label, "points": 0, "degraded": True})
+            metrics["degraded"] = True
+            continue
+
+        df = pd.DataFrame(points)
+        n = len(df)
+        # Check GPS quality
+        has_gps = df["lat"].notna().any() and (df["lat"] != 0).any()
+        if not has_gps:
+            print(f"  [02] {auv_id}/{label}: {n} pts, GPS=0 (degraded — AUV underwater)")
+            metrics["degraded"] = True
+        else:
+            print(f"  [02] {auv_id}/{label}: {n} pts, GPS OK")
+
+        df.to_parquet(out_parquet, index=False)
+        all_points_total += n
+        metrics["segments"].append({"label": label, "points": n, "degraded": not has_gps})
+
+    metrics["total_points"] = all_points_total
+    if all_points_total == 0:
+        metrics["status"] = "degraded"
+    return metrics
+
+
+def parse_mission(manifest_path: Path, auv_filter: str | None = None) -> list[dict]:
+    manifest = json.loads(manifest_path.read_text())
+    mission_name = manifest["mission"]
+
+    out_dir = PIPELINE_BASE / "data" / mission_name / "nav"
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    auv_ids = list(set(
+        manifest.get("auv_ids_bags", []) +
+        list(manifest.get("auv_mapping", {}).keys())
+    ))
+    if not auv_ids:
+        auv_ids = manifest.get("auv_ids_video", [])
+    if auv_filter:
+        auv_ids = [a for a in auv_ids if a == auv_filter]
+
+    all_metrics = []
+    init_db()
+
+    for auv_id in sorted(auv_ids):
+        print(f"[02] === {auv_id} ===")
+        m = parse_auv(manifest, auv_id, out_dir)
+        all_metrics.append(m)
+
+        with get_conn() as conn:
+            mission_row = conn.execute("SELECT id FROM missions WHERE name=?", (mission_name,)).fetchone()
+            if mission_row:
+                job_id = upsert_job(conn, mission_row["id"], auv_id, "all", "02_nav_parse",
+                                    status="done" if m["status"] == "ok" else m["status"],
+                                    output_path=str(out_dir))
+                record_metric(conn, job_id, "nav_points_total", value=m["total_points"],
+                              pass_fail="pass" if m["total_points"] > 0 else "warn")
+
+    return all_metrics
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Stage 02 — Parse nav from MCAP bags")
+    ap.add_argument("manifest", type=Path)
+    ap.add_argument("--auv", type=str, default=None)
+    args = ap.parse_args()
+
+    metrics = parse_mission(args.manifest, auv_filter=args.auv)
+
+    print("\n=== Stage 02 summary ===")
+    for m in metrics:
+        segs = m.get("segments", [])
+        total = m.get("total_points", 0)
+        deg = "DEGRADED" if m.get("degraded") else "OK"
+        print(f"  {m['auv_id']}: {total} pts across {len(segs)} segments [{deg}]")
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/02_usbl_parse.py
+++ b/pipeline/stages/02_usbl_parse.py
@@ -0,0 +1,272 @@
+#!/usr/bin/env python3
+"""Stage 02 — Parse USBL/navigation from MCAP bag files.
+
+The USBL CSV logs in logs/SUB/log/ contain raw serial bytes, NOT lat/lon.
+Real navigation data is in MCAP bags (logs/SUB/bag/).
+
+This stage:
+1. Reads MCAP files per AUV session
+2. Extracts position topics (configurable in default_params.yaml)
+3. Falls back to parsing serial bytes if no nav topic found (best-effort)
+4. Outputs Parquet per AUV with columns: timestamp, lat, lon, depth, heading
+
+Usage:
+    python3 02_usbl_parse.py /home/cosma/cosma-pipeline/20260505-Lepradet/manifest.json
+    python3 02_usbl_parse.py /home/cosma/cosma-pipeline/20260505-Lepradet/manifest.json --auv AUV010
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import struct
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+import numpy as np
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from orchestrator.db import init_db, get_conn, upsert_job, record_metric, now_iso
+
+LOG_DIR = Path(os.environ.get("COSMA_PIPELINE_LOGS", "/home/cosma/cosma-pipeline/logs"))
+LOG_DIR.mkdir(parents=True, exist_ok=True)
+
+# Known nav topics to try in MCAP files
+NAV_TOPICS = [
+    "/usbl/position",
+    "/usbl/fix",
+    "/navigation/position",
+    "/bluerov/usbl",
+    "/waterlinked/position",
+    "/fix",
+    "/gps/fix",
+    "/mavros/global_position/global",
+    "/mavros/global_position/local",
+]
+
+
+def try_parse_mcap(mcap_path: Path, topics: list[str] | None = None) -> list[dict]:
+    """Try to extract nav points from MCAP file. Returns list of {ts, lat, lon, depth}."""
+    try:
+        from mcap.reader import make_reader
+    except ImportError:
+        print(f"  [02] mcap not installed, skipping {mcap_path.name}")
+        return []
+
+    points = []
+    try:
+        with open(mcap_path, "rb") as f:
+            reader = make_reader(f)
+            for schema, channel, message in reader.iter_messages(topics=topics):
+                # Try to deserialize — support ROS2 JSON-encoded or raw
+                try:
+                    import json as _json
+                    data = _json.loads(message.data)
+                    lat = data.get("latitude") or data.get("lat")
+                    lon = data.get("longitude") or data.get("lon")
+                    depth = data.get("depth") or data.get("altitude")
+                    heading = data.get("heading") or data.get("yaw")
+                    if lat is not None and lon is not None:
+                        ts_ns = message.log_time
+                        ts = ts_ns / 1e9
+                        points.append({
+                            "timestamp": ts,
+                            "lat": float(lat),
+                            "lon": float(lon),
+                            "depth": float(depth) if depth is not None else None,
+                            "heading": float(heading) if heading is not None else None,
+                            "source": channel.topic,
+                        })
+                except Exception:
+                    pass
+    except Exception as e:
+        print(f"  [02] MCAP read error {mcap_path.name}: {e}")
+
+    return points
+
+
+def try_parse_serial_csv(csv_path: Path) -> list[dict]:
+    """Best-effort: parse raw serial byte log for USBL range/bearing frames.
+    Waterlinked USBL M64 protocol: 0xBB 0x55 frame header.
+    This is a rough attempt — actual lat/lon requires ship GPS + range+bearing.
+    Returns relative-only positions (range, bearing) if decoded.
+    """
+    points = []
+    try:
+        with open(csv_path, "r") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                parts = line.split(",", 2)
+                if len(parts) < 3:
+                    continue
+                ts_str, direction, raw = parts[0], parts[1], parts[2]
+                if direction.strip() != "RECEIVED":
+                    continue
+                # Extract bytes from repr string b'\xbb...'
+                try:
+                    raw_clean = raw.strip().strip('"')
+                    # Parse Python bytes repr
+                    data = eval(raw_clean)  # safe: only used on known CSV
+                    if len(data) >= 4 and data[0] == 0xBB and data[1] == 0x55:
+                        # Waterlinked M64 position frame: len byte at [2], payload follows
+                        payload_len = data[2]
+                        if len(data) >= payload_len + 3:
+                            # Best effort: look for float32 values in payload
+                            # Actual protocol decoding would need WL M64 spec
+                            ts = datetime.fromisoformat(ts_str)
+                            points.append({
+                                "timestamp": ts.timestamp(),
+                                "lat": None,
+                                "lon": None,
+                                "depth": None,
+                                "heading": None,
+                                "source": "serial_raw",
+                                "raw_bytes": data.hex()[:32],
+                            })
+                except Exception:
+                    pass
+    except Exception as e:
+        print(f"  [02] serial CSV parse error {csv_path.name}: {e}")
+
+    return points
+
+
+def parse_auv_sessions(manifest: dict, auv_id: str, out_dir: Path,
+                       topics: list[str] | None = None) -> dict:
+    """Parse all sessions for one AUV. Returns metrics dict."""
+    metrics = {"auv_id": auv_id, "points_raw": 0, "sources": [], "status": "ok"}
+    all_points: list[dict] = []
+
+    # Try MCAP bags first
+    bag_sessions = manifest.get("bag_sessions_per_auv", {}).get(auv_id, [])
+    if not bag_sessions:
+        # Try mapping: maybe bags use AUV0xx while videos use AUV2xx
+        auv_map = manifest.get("auv_mapping", {})
+        bag_auv = auv_map.get(auv_id)
+        if bag_auv:
+            bag_sessions = manifest.get("bag_sessions_per_auv", {}).get(bag_auv, [])
+
+    for sess in bag_sessions:
+        for mcap_path_str in sess.get("mcap_files", []):
+            mcap_path = Path(mcap_path_str)
+            if not mcap_path.exists():
+                continue
+            pts = try_parse_mcap(mcap_path, topics=topics or NAV_TOPICS)
+            if pts:
+                all_points.extend(pts)
+                if "mcap" not in metrics["sources"]:
+                    metrics["sources"].append("mcap")
+                print(f"  [02] {auv_id} {mcap_path.name}: {len(pts)} nav points")
+
+    # Fallback: serial CSV if no MCAP nav
+    if not all_points:
+        csv_entries = manifest.get("usbl_csv_per_auv", {}).get(auv_id, [])
+        for entry in csv_entries:
+            csv_path = Path(entry["path"])
+            if not csv_path.exists():
+                continue
+            pts = try_parse_serial_csv(csv_path)
+            if pts:
+                all_points.extend(pts)
+                if "serial_csv" not in metrics["sources"]:
+                    metrics["sources"].append("serial_csv")
+
+    metrics["points_raw"] = len(all_points)
+
+    if not all_points:
+        metrics["status"] = "degraded"
+        metrics["note"] = "no nav points found in MCAP or serial CSV"
+        print(f"  [02] {auv_id}: NO nav data found — degraded")
+    else:
+        print(f"  [02] {auv_id}: {len(all_points)} raw nav points from {metrics['sources']}")
+
+    # Save output
+    out_file = out_dir / f"{auv_id}_nav_raw.json"
+    out_file.write_text(json.dumps({
+        "auv_id": auv_id,
+        "generated_at": now_iso(),
+        "metrics": metrics,
+        "points": all_points,
+    }, indent=2, default=str))
+
+    # Also save as simple CSV for downstream
+    csv_out = out_dir / f"{auv_id}_nav_raw.csv"
+    with open(csv_out, "w") as f:
+        f.write("timestamp,lat,lon,depth,heading,source\n")
+        for p in all_points:
+            f.write(f"{p['timestamp']},{p.get('lat','')},{p.get('lon','')},{p.get('depth','')},{p.get('heading','')},{p.get('source','')}\n")
+
+    return metrics
+
+
+def parse_mission(manifest_path: Path, auv_filter: str | None = None,
+                  out_dir: Path | None = None) -> list[dict]:
+    manifest = json.loads(manifest_path.read_text())
+    mission_name = manifest["mission"]
+
+    if out_dir is None:
+        out_dir = manifest_path.parent / "02_usbl_raw"
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # Idempotency: check if all AUV outputs exist
+    auv_ids = manifest.get("auv_ids_bags", []) or manifest.get("auv_ids_video", [])
+    if auv_filter:
+        auv_ids = [a for a in auv_ids if a == auv_filter]
+
+    all_metrics = []
+    init_db()
+
+    for auv_id in auv_ids:
+        out_file = out_dir / f"{auv_id}_nav_raw.json"
+        if out_file.exists():
+            print(f"[02] {auv_id}: output exists, skipping")
+            existing = json.loads(out_file.read_text())
+            all_metrics.append(existing.get("metrics", {"auv_id": auv_id, "status": "cached"}))
+            continue
+
+        print(f"[02] Parsing {auv_id} ...")
+        m = parse_auv_sessions(manifest, auv_id, out_dir)
+        all_metrics.append(m)
+
+        # Record in DB
+        with get_conn() as conn:
+            from orchestrator.db import upsert_mission
+            mission_id_row = conn.execute(
+                "SELECT id FROM missions WHERE name=?", (mission_name,)
+            ).fetchone()
+            if mission_id_row:
+                mission_id = mission_id_row["id"]
+                job_id = upsert_job(conn, mission_id, auv_id, "all", "02_usbl_parse",
+                                    status="done" if m["status"] == "ok" else m["status"],
+                                    output_path=str(out_dir))
+                record_metric(conn, job_id, "usbl_points_raw",
+                              value=m["points_raw"],
+                              pass_fail="pass" if m["points_raw"] > 0 else "fail")
+
+    return all_metrics
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Stage 02 — Parse USBL/nav from MCAP bags")
+    ap.add_argument("manifest", type=Path, help="manifest.json from stage 01")
+    ap.add_argument("--auv", type=str, default=None, help="Filter to single AUV ID")
+    ap.add_argument("--out", type=Path, default=None, help="Output directory")
+    args = ap.parse_args()
+
+    metrics = parse_mission(args.manifest, auv_filter=args.auv, out_dir=args.out)
+
+    print("\n=== Stage 02 summary ===")
+    total_pts = sum(m.get("points_raw", 0) for m in metrics)
+    for m in metrics:
+        status = m.get("status", "?")
+        pts = m.get("points_raw", 0)
+        src = m.get("sources", [])
+        print(f"  {m['auv_id']}: {pts} pts  {src}  [{status}]")
+    print(f"Total nav points: {total_pts}")
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/02b_runs_diag.py
+++ b/pipeline/stages/02b_runs_diag.py
@@ -0,0 +1,352 @@
+#!/usr/bin/env python3
+"""Stage 02b — Diagnostic plots per run candidate.
+
+Reads 02_runs.json + replays MCAP rel_alt + /mavros/state to produce
+4-panel PNG per run (validated + rejected).
+
+Output:
+    data/<MISSION>/02_runs_diag/<RUN_ID>.png
+    data/<MISSION>/02_runs_diag/index.json
+    data/<MISSION>/02_runs_diag/index.html
+
+Usage:
+    python3 02b_runs_diag.py --mission 20260505-Lepradet \
+        [--ssd-base /mnt/ssd] [--out data/] [--padding-s 120]
+"""
+
+import argparse
+import json
+import re
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from matplotlib.patches import Rectangle
+
+from mcap.reader import make_reader
+from mcap_ros2.decoder import DecoderFactory
+
+TOPIC_REL_ALT = "/mavros/global_position/rel_alt"
+TOPIC_STATE = "/mavros/state"
+TOPIC_HEADING = "/mavros/global_position/compass_hdg"
+
+AUV_PHYSICAL_MAP = {"AUV010": "AUV210", "AUV012": "AUV212", "AUV013": "AUV213"}
+PHYS_TO_MCAP = {v: k for k, v in AUV_PHYSICAL_MAP.items()}
+
+
+def extract_auv_id(folder_name):
+    m = re.search(r"(AUV\d+)$", folder_name)
+    return m.group(1) if m else None
+
+
+def gather_signals_for_auv(bags_root, auv_mcap_id, t_start, t_end):
+    """Read rel_alt + state + heading from all bags for this AUV in [t_start,t_end]."""
+    alt = []  # (t, v)
+    state = []  # (t, armed, mode)
+    hdg = []  # (t, v_deg)
+    for d in sorted(bags_root.iterdir()):
+        if not d.is_dir():
+            continue
+        if extract_auv_id(d.name) != auv_mcap_id:
+            continue
+        for mcap_path in sorted(d.glob("*.mcap")):
+            try:
+                with open(mcap_path, "rb") as fp:
+                    reader = make_reader(fp, decoder_factories=[DecoderFactory()])
+                    for _schema, channel, message, ros_msg in reader.iter_decoded_messages(
+                            topics=[TOPIC_REL_ALT, TOPIC_STATE, TOPIC_HEADING]):
+                        ts = message.log_time / 1e9
+                        if ts < t_start or ts > t_end:
+                            continue
+                        if channel.topic == TOPIC_REL_ALT:
+                            alt.append((ts, float(ros_msg.data)))
+                        elif channel.topic == TOPIC_STATE:
+                            state.append((ts, bool(ros_msg.armed), str(ros_msg.mode)))
+                        elif channel.topic == TOPIC_HEADING:
+                            hdg.append((ts, float(ros_msg.data)))
+            except Exception as exc:
+                print(f"  [WARN] skip {mcap_path.name}: {exc}", file=sys.stderr)
+    alt.sort()
+    state.sort()
+    hdg.sort()
+    return alt, state, hdg
+
+
+def plot_run_diag(run, alt, state, hdg, filter_params, status, reject_reason, out_path):
+    """4-panel diagnostic plot."""
+    start_e = run["start_epoch"]
+    end_e = run["end_epoch"]
+    dur = end_e - start_e
+    run_id = run["run_id"]
+
+    # Convert epoch → relative seconds from start_e for x-axis readability
+    def to_rel(t):
+        return t - start_e
+
+    fig, axes = plt.subplots(4, 1, figsize=(12, 11), gridspec_kw={"height_ratios": [3, 1, 2, 2]})
+    fig.suptitle(
+        f"{run_id} — {status}  |  start={datetime.fromtimestamp(start_e, tz=timezone.utc).strftime('%H:%M:%S UTC')}  duration={dur:.0f}s",
+        fontsize=12, fontweight="bold",
+        color=("#16a34a" if status == "OK" else "#dc2626"),
+    )
+
+    # === Panel A: rel_alt time series ===
+    ax = axes[0]
+    if alt:
+        t_a = [to_rel(t) for t, _ in alt]
+        v_a = [v for _, v in alt]
+        ax.plot(t_a, v_a, color="#1e40af", lw=1.0, label="rel_alt")
+    threshold = filter_params["min_mission_depth_m"]
+    ax.axhline(threshold, color="#dc2626", ls="--", lw=1.0,
+               label=f"threshold {threshold}m")
+    ax.axhline(0, color="#94a3b8", ls=":", lw=0.8, label="surface")
+    # Mark run window
+    ax.axvspan(0, dur, alpha=0.10, color="#16a34a" if status == "OK" else "#dc2626")
+    ax.set_ylabel("rel_alt (m)")
+    ax.set_title(
+        f"(a) Depth — max={run['max_depth_m']}m  mean={run['mean_depth_m']}m  "
+        f"sustained={run['sustained_duration_s']:.0f}s below {threshold}m  "
+        f"pct_near_bottom={run['pct_near_bottom']:.1f}%"
+    )
+    ax.legend(loc="lower right", fontsize=8)
+    ax.grid(True, alpha=0.3)
+    ax.set_xlim(-30, dur + 30)
+
+    # === Panel B: state armed + mode ===
+    ax = axes[1]
+    if state:
+        t_s = [to_rel(t) for t, _, _ in state]
+        armed_y = [1 if a else 0 for _, a, _ in state]
+        ax.step(t_s, armed_y, where="post", color="#16a34a", lw=1.2, label="armed")
+        # Annotate dominant mode + mode transitions
+        prev_mode = None
+        for t, _, m in state:
+            if m != prev_mode:
+                ax.axvline(to_rel(t), color="#7c3aed", lw=0.5, alpha=0.4)
+                ax.text(to_rel(t), 0.5, m, rotation=90, fontsize=7,
+                        color="#7c3aed", va="center", alpha=0.7)
+                prev_mode = m
+    ax.set_ylim(-0.2, 1.3)
+    ax.set_yticks([0, 1])
+    ax.set_yticklabels(["disarmed", "armed"])
+    ax.set_title(f"(b) MAVROS state — dominant mode: {run.get('dominant_mode','?')}")
+    ax.grid(True, alpha=0.3)
+    ax.set_xlim(-30, dur + 30)
+
+    # === Panel C: depth distribution + cumulative time below threshold ===
+    ax = axes[2]
+    # filter alt to run window
+    run_vals = [v for t, v in alt if start_e <= t <= end_e]
+    if run_vals:
+        ax.hist(run_vals, bins=40, color="#3b82f6", alpha=0.7, edgecolor="white")
+        ax.axvline(threshold, color="#dc2626", ls="--", lw=1.0,
+                   label=f"threshold {threshold}m")
+        n_total = len(run_vals)
+        n_below = sum(1 for v in run_vals if v < threshold)
+        ax.set_title(
+            f"(c) Depth distribution within run  |  {n_below}/{n_total} samples below threshold "
+            f"= {100*n_below/n_total:.1f}%"
+        )
+        ax.legend(loc="upper right", fontsize=8)
+    ax.set_xlabel("rel_alt (m)")
+    ax.set_ylabel("samples")
+    ax.grid(True, alpha=0.3)
+
+    # === Panel D: verdict box ===
+    ax = axes[3]
+    ax.axis("off")
+    color = "#16a34a" if status == "OK" else "#dc2626"
+    verdict_text = f"VERDICT: {status}"
+    ax.text(0.02, 0.85, verdict_text, fontsize=18, fontweight="bold",
+            color=color, transform=ax.transAxes)
+
+    # Build criteria summary
+    crit_lines = []
+    min_dur = filter_params["min_sustained_duration_s"]
+    min_pct = filter_params["min_near_bottom_pct"]
+    min_depth = filter_params["min_mission_depth_m"]
+
+    pass_dur = run["sustained_duration_s"] >= min_dur
+    pass_pct = run["pct_near_bottom"] >= min_pct
+    pass_depth = run["max_depth_m"] < min_depth
+
+    crit_lines.append(
+        f"{'OK' if pass_depth else 'KO':3s}  max_depth {run['max_depth_m']}m < {min_depth}m"
+    )
+    crit_lines.append(
+        f"{'OK' if pass_dur else 'KO':3s}  sustained_duration {run['sustained_duration_s']:.0f}s >= {min_dur:.0f}s"
+    )
+    crit_lines.append(
+        f"{'OK' if pass_pct else 'KO':3s}  pct_near_bottom {run['pct_near_bottom']:.1f}% >= {min_pct:.0f}%"
+    )
+    crit_lines.append(
+        f"     duration {run['duration_s']:.0f}s | mode {run.get('dominant_mode','?')} | method {run.get('detection_method','?')}"
+    )
+
+    for i, line in enumerate(crit_lines):
+        ax.text(0.02, 0.65 - i * 0.13, line, fontsize=10, family="monospace",
+                transform=ax.transAxes)
+
+    if reject_reason:
+        ax.text(0.02, 0.05, f"REASON: {reject_reason}", fontsize=9,
+                color="#dc2626", transform=ax.transAxes, family="monospace")
+
+    axes[-2].set_xlabel("time since run start (s)")
+    plt.tight_layout()
+    plt.savefig(out_path, dpi=90, bbox_inches="tight")
+    plt.close(fig)
+
+
+HTML_TEMPLATE = """<!doctype html>
+<html lang="fr">
+<head>
+<meta charset="utf-8">
+<title>02 Runs Diagnostic — {mission}</title>
+<style>
+body {{ font-family: -apple-system, system-ui, sans-serif; background: #0f172a; color: #e2e8f0; margin: 0; padding: 20px; }}
+h1 {{ color: #38bdf8; margin: 0 0 4px 0; }}
+.subtitle {{ color: #94a3b8; margin-bottom: 20px; font-size: 14px; }}
+.params {{ background: #1e293b; padding: 10px 14px; border-radius: 6px; margin-bottom: 24px; font-family: monospace; font-size: 12px; color: #cbd5e1; }}
+.grid {{ display: grid; grid-template-columns: repeat(auto-fit, minmax(560px, 1fr)); gap: 20px; }}
+.card {{ background: #1e293b; border-radius: 8px; padding: 12px; border-left: 6px solid #475569; }}
+.card.ok {{ border-left-color: #16a34a; }}
+.card.ko {{ border-left-color: #dc2626; }}
+.card h3 {{ margin: 0 0 6px 0; font-size: 16px; }}
+.card h3.ok {{ color: #4ade80; }}
+.card h3.ko {{ color: #f87171; }}
+.card img {{ width: 100%; height: auto; border-radius: 4px; }}
+.meta {{ color: #94a3b8; font-size: 12px; margin: 4px 0 8px 0; font-family: monospace; }}
+.reason {{ color: #fca5a5; font-size: 12px; margin-top: 6px; font-family: monospace; }}
+.summary {{ display: flex; gap: 16px; margin-bottom: 20px; }}
+.pill {{ background: #1e293b; padding: 8px 14px; border-radius: 999px; font-size: 13px; }}
+.pill.ok {{ color: #4ade80; }}
+.pill.ko {{ color: #f87171; }}
+</style>
+</head>
+<body>
+<h1>02 Runs Diagnostic — {mission}</h1>
+<div class="subtitle">Generated {generated_at} · filter v5 strict</div>
+<div class="summary">
+  <div class="pill ok">OK: {n_ok}</div>
+  <div class="pill ko">Rejected: {n_ko}</div>
+  <div class="pill">Total candidates: {n_total}</div>
+</div>
+<div class="params">{params}</div>
+<div class="grid">
+{cards}
+</div>
+</body>
+</html>
+"""
+
+CARD_TEMPLATE = """<div class="card {cls}">
+  <h3 class="{cls}">{run_id} — {status}</h3>
+  <div class="meta">duration={duration}s | max_depth={max_depth}m | sustained={sustained}s | pct_near={pct}% | mode={mode}</div>
+  <img src="{png}" alt="{run_id}">
+  {reason_block}
+</div>"""
+
+
+def build_html(mission, filter_params, generated_at, ok_runs, ko_runs, out_dir):
+    cards = []
+    for run, status, reason in [(r, "OK", None) for r in ok_runs] + [(r, "REJECTED", r.get("rejected_reason")) for r in ko_runs]:
+        cls = "ok" if status == "OK" else "ko"
+        reason_block = f'<div class="reason">REASON: {reason}</div>' if reason else ""
+        cards.append(CARD_TEMPLATE.format(
+            cls=cls, run_id=run["run_id"], status=status,
+            duration=f"{run['duration_s']:.0f}",
+            max_depth=run["max_depth_m"],
+            sustained=f"{run['sustained_duration_s']:.0f}",
+            pct=f"{run['pct_near_bottom']:.1f}",
+            mode=run.get("dominant_mode", "?"),
+            png=f"{run['run_id']}.png",
+            reason_block=reason_block,
+        ))
+    html = HTML_TEMPLATE.format(
+        mission=mission,
+        generated_at=generated_at,
+        params=json.dumps(filter_params, indent=2),
+        n_ok=len(ok_runs),
+        n_ko=len(ko_runs),
+        n_total=len(ok_runs) + len(ko_runs),
+        cards="\n".join(cards),
+    )
+    (out_dir / "index.html").write_text(html, encoding="utf-8")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Stage 02b — runs diagnostic plots")
+    parser.add_argument("--mission", required=True)
+    parser.add_argument("--ssd-base", default="/mnt/ssd")
+    parser.add_argument("--out", default="data/")
+    parser.add_argument("--padding-s", type=float, default=120.0,
+                        help="padding (s) avant/après run pour contexte")
+    args = parser.parse_args()
+
+    mission_dir = Path(args.out) / args.mission
+    runs_json = mission_dir / "02_runs.json"
+    if not runs_json.exists():
+        print(f"[ERROR] {runs_json} missing — run 02_mission_run_detect first", file=sys.stderr)
+        sys.exit(1)
+
+    bags_root = Path(args.ssd_base) / args.mission / "raw_data" / "logs" / "SUB" / "bag"
+    if not bags_root.exists():
+        print(f"[ERROR] bags dir not found: {bags_root}", file=sys.stderr)
+        sys.exit(1)
+
+    data = json.loads(runs_json.read_text())
+    filter_params = data["filter_params"]
+    out_dir = mission_dir / "02_runs_diag"
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    ok_runs = data["all_runs_sorted"]
+    ko_runs = data["all_runs_rejected"]
+    print(f"[stage02b] {len(ok_runs)} OK + {len(ko_runs)} rejected = {len(ok_runs)+len(ko_runs)} runs to plot")
+
+    # Group by AUV → 1 read per AUV bag set covering all runs
+    runs_by_auv = {}
+    for run in ok_runs + ko_runs:
+        # run_id "AUV210_run_00" → AUV210
+        phys = run["run_id"].split("_")[0]
+        runs_by_auv.setdefault(phys, []).append(run)
+
+    pad = args.padding_s
+    for phys_id, runs in runs_by_auv.items():
+        mcap_id = PHYS_TO_MCAP.get(phys_id, phys_id)
+        t_start = min(r["start_epoch"] for r in runs) - pad
+        t_end = max(r["end_epoch"] for r in runs) + pad
+        print(f"  [{phys_id}] reading {mcap_id} bags [{t_start:.0f}..{t_end:.0f}] for {len(runs)} runs")
+        alt, state, hdg = gather_signals_for_auv(bags_root, mcap_id, t_start, t_end)
+        print(f"    {len(alt)} alt pts, {len(state)} state pts, {len(hdg)} hdg pts")
+
+        for run in runs:
+            r_start = run["start_epoch"] - pad
+            r_end = run["end_epoch"] + pad
+            alt_r = [(t, v) for t, v in alt if r_start <= t <= r_end]
+            state_r = [(t, a, m) for t, a, m in state if r_start <= t <= r_end]
+            hdg_r = [(t, v) for t, v in hdg if r_start <= t <= r_end]
+            status = "REJECTED" if run in ko_runs else "OK"
+            reason = run.get("rejected_reason") if status == "REJECTED" else None
+            out_png = out_dir / f"{run['run_id']}.png"
+            plot_run_diag(run, alt_r, state_r, hdg_r, filter_params, status, reason, out_png)
+            print(f"    [{status}] {run['run_id']}.png written")
+
+    # Build index.html + index.json
+    generated_at = datetime.now(timezone.utc).isoformat()
+    index = {
+        "mission": args.mission,
+        "generated_at": generated_at,
+        "filter_params": filter_params,
+        "ok_runs": [r["run_id"] for r in ok_runs],
+        "rejected_runs": [r["run_id"] for r in ko_runs],
+    }
+    (out_dir / "index.json").write_text(json.dumps(index, indent=2))
+    build_html(args.mission, filter_params, generated_at, ok_runs, ko_runs, out_dir)
+    print(f"[stage02b] Done: {out_dir}/index.html")
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/03_nav_filter.py
+++ b/pipeline/stages/03_nav_filter.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+"""Stage 03 — Filter and smooth navigation trajectories.
+
+Input:  ~/cosma-pipeline/data/<mission>/nav/<AUV>_<segment>.parquet
+Output: ~/cosma-pipeline/data/<mission>/nav_filtered/<AUV>_<segment>.parquet
+
+Steps:
+1. Drop rows with null lat/lon OR lat==0 AND lon==0 (no GPS lock)
+2. MAD-3σ outlier removal on lat, lon
+3. Moving average smoothing (window 5s, KISS)
+4. Depth from pressure: depth_m = (pressure_pa - 101325) / (1025 * 9.81)
+5. Output: same columns + lat_smooth, lon_smooth, depth_m
+
+Usage:
+    python3 03_nav_filter.py /home/cosma/cosma-pipeline/data/20260505-Lepradet/nav/
+    python3 03_nav_filter.py /path/nav/ --auv AUV013 --sigma 2.5
+"""
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from pathlib import Path
+
+import numpy as np
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from orchestrator.db import init_db, get_conn, upsert_job, record_metric, now_iso
+
+PIPELINE_BASE = Path(os.environ.get("COSMA_PIPELINE_BASE", "/home/cosma/cosma-pipeline"))
+RHO_SEA = 1025.0  # kg/m3
+G = 9.81          # m/s2
+P_ATM = 101325.0  # Pa
+
+
+def mad_mask(arr: np.ndarray, sigma: float = 3.0) -> np.ndarray:
+    """True = keep."""
+    if len(arr) < 4:
+        return np.ones(len(arr), dtype=bool)
+    med = np.median(arr)
+    mad = np.median(np.abs(arr - med))
+    if mad == 0:
+        return np.ones(len(arr), dtype=bool)
+    return np.abs(0.6745 * (arr - med) / mad) < sigma
+
+
+def moving_average(arr: np.ndarray, window: int = 5) -> np.ndarray:
+    if len(arr) < window:
+        return arr.copy()
+    pad = window // 2
+    padded = np.pad(arr, (pad, pad), mode="edge")
+    return np.convolve(padded, np.ones(window) / window, mode="valid")[:len(arr)]
+
+
+def filter_parquet(src: Path, dst_dir: Path, sigma: float = 3.0, window: int = 5) -> dict:
+    import pandas as pd
+
+    df = pd.read_parquet(src)
+    auv_seg = src.stem
+    metrics = {
+        "file": src.name,
+        "points_in": len(df),
+        "points_out": 0,
+        "status": "ok",
+    }
+
+    # Step 1: drop null/zero GPS
+    has_lat = "lat" in df.columns and df["lat"].notna().any()
+    if has_lat:
+        mask_valid = df["lat"].notna() & df["lon"].notna() & (df["lat"] != 0) & (df["lon"] != 0)
+        df_valid = df[mask_valid].copy()
+    else:
+        # No GPS — keep all rows for IMU/pressure
+        df_valid = df.copy()
+        metrics["degraded"] = True
+
+    if len(df_valid) == 0:
+        metrics["status"] = "degraded"
+        metrics["note"] = "no valid GPS points"
+        print(f"  [03] {auv_seg}: no valid GPS — saving as-is with depth calc only")
+        df_out = df.copy()
+    else:
+        # Step 2: MAD outlier removal on lat/lon
+        if has_lat and len(df_valid) >= 4:
+            lats = df_valid["lat"].values
+            lons = df_valid["lon"].values
+            mask = mad_mask(lats, sigma) & mad_mask(lons, sigma)
+            n_removed = int((~mask).sum())
+            df_valid = df_valid[mask].copy()
+            metrics["points_removed_outlier"] = n_removed
+        else:
+            metrics["points_removed_outlier"] = 0
+
+        # Step 3: sort by timestamp
+        if "ts_ns" in df_valid.columns:
+            df_valid = df_valid.sort_values("ts_ns").reset_index(drop=True)
+
+        # Step 4: smooth lat/lon
+        if has_lat and len(df_valid) >= window:
+            df_valid["lat_smooth"] = moving_average(df_valid["lat"].values, window)
+            df_valid["lon_smooth"] = moving_average(df_valid["lon"].values, window)
+        elif has_lat and len(df_valid) > 0:
+            df_valid["lat_smooth"] = df_valid["lat"]
+            df_valid["lon_smooth"] = df_valid["lon"]
+        else:
+            df_valid["lat_smooth"] = np.nan
+            df_valid["lon_smooth"] = np.nan
+
+        df_out = df_valid
+
+    # Step 5: depth from pressure
+    if "pressure_pa" in df_out.columns and df_out["pressure_pa"].notna().any():
+        df_out["depth_m"] = (df_out["pressure_pa"] - P_ATM) / (RHO_SEA * G)
+        df_out["depth_m"] = df_out["depth_m"].abs()  # negative when underwater (P < Patm)  # surface = 0
+    else:
+        df_out["depth_m"] = np.nan
+
+    dst_dir.mkdir(parents=True, exist_ok=True)
+    out_path = dst_dir / src.name
+    df_out.to_parquet(out_path, index=False)
+
+    metrics["points_out"] = len(df_out)
+    removed_null = metrics["points_in"] - len(df_out) - metrics.get("points_removed_outlier", 0)
+    metrics["points_removed_null"] = max(0, removed_null)
+    print(f"  [03] {auv_seg}: {metrics['points_in']} → {metrics['points_out']} pts, "
+          f"depth_m range=[{df_out['depth_m'].min():.1f}, {df_out['depth_m'].max():.1f}]"
+          if df_out["depth_m"].notna().any() else
+          f"  [03] {auv_seg}: {metrics['points_in']} → {metrics['points_out']} pts, no pressure")
+    return metrics
+
+
+def filter_mission(nav_dir: Path, auv_filter: str | None = None,
+                   sigma: float = 3.0, window: int = 5) -> list[dict]:
+    out_dir = nav_dir.parent / "nav_filtered"
+
+    parquet_files = sorted(nav_dir.glob("*.parquet"))
+    if auv_filter:
+        parquet_files = [f for f in parquet_files if auv_filter in f.name]
+
+    all_metrics = []
+    init_db()
+
+    for pf in parquet_files:
+        out_file = out_dir / pf.name
+        if out_file.exists():
+            print(f"[03] {pf.stem}: cached")
+            continue
+
+        print(f"[03] Filtering {pf.name}...")
+        m = filter_parquet(pf, out_dir, sigma=sigma, window=window)
+        all_metrics.append(m)
+
+        with get_conn() as conn:
+            mission_name = nav_dir.parent.name
+            mission_row = conn.execute("SELECT id FROM missions WHERE name=?", (mission_name,)).fetchone()
+            if mission_row:
+                auv_id = pf.stem.split("_")[0]
+                job_id = upsert_job(conn, mission_row["id"], auv_id, "all", "03_nav_filter",
+                                    status="done" if m.get("status") == "ok" else m.get("status", "done"),
+                                    output_path=str(out_dir))
+                record_metric(conn, job_id, "nav_points_filtered", value=m.get("points_out", 0),
+                              pass_fail="pass" if m.get("points_out", 0) > 0 else "warn")
+
+    return all_metrics
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Stage 03 — Filter nav trajectories")
+    ap.add_argument("nav_dir", type=Path, help="Directory with *.parquet from stage 02")
+    ap.add_argument("--auv", type=str, default=None)
+    ap.add_argument("--sigma", type=float, default=3.0)
+    ap.add_argument("--window", type=int, default=5)
+    args = ap.parse_args()
+
+    metrics = filter_mission(args.nav_dir, auv_filter=args.auv,
+                             sigma=args.sigma, window=args.window)
+
+    print("\n=== Stage 03 summary ===")
+    for m in metrics:
+        print(f"  {m.get('file','?')}: {m.get('points_in',0)} → {m.get('points_out',0)} "
+              f"[{m.get('status','?')}]")
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/03_usbl_filter.py
+++ b/pipeline/stages/03_usbl_filter.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+"""Stage 03 — Filter and smooth USBL navigation trajectory.
+
+Input:  02_usbl_raw/<AUV>_nav_raw.json (or .csv)
+Output: 03_usbl_filtered/<AUV>_nav_filtered.json + .csv
+
+Steps:
+1. Drop points with null lat/lon
+2. MAD-3σ outlier removal on lat, lon, depth independently
+3. Moving-average smoothing (window=5 by default)
+4. Optional: simple 1D Kalman on each axis (KISS — no cross-covariance)
+
+Usage:
+    python3 03_usbl_filter.py /home/cosma/cosma-pipeline/20260505-Lepradet/02_usbl_raw/
+    python3 03_usbl_filter.py /path/to/02_usbl_raw/ --auv AUV010 --sigma 2.5
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import sys
+from pathlib import Path
+
+import numpy as np
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from orchestrator.db import init_db, get_conn, upsert_job, record_metric, now_iso
+
+LOG_DIR = Path(os.environ.get("COSMA_PIPELINE_LOGS", "/home/cosma/cosma-pipeline/logs"))
+LOG_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def mad_outlier_mask(arr: np.ndarray, sigma: float = 3.0) -> np.ndarray:
+    """Returns boolean mask: True = keep (inlier). Uses MAD-based sigma."""
+    if len(arr) < 4:
+        return np.ones(len(arr), dtype=bool)
+    median = np.median(arr)
+    mad = np.median(np.abs(arr - median))
+    if mad == 0:
+        return np.ones(len(arr), dtype=bool)
+    modified_z = 0.6745 * (arr - median) / mad
+    return np.abs(modified_z) < sigma
+
+
+def moving_average(arr: np.ndarray, window: int = 5) -> np.ndarray:
+    """Centered moving average with edge padding."""
+    if len(arr) < window:
+        return arr.copy()
+    pad = window // 2
+    padded = np.pad(arr, (pad, pad), mode="edge")
+    kernel = np.ones(window) / window
+    return np.convolve(padded, kernel, mode="valid")[:len(arr)]
+
+
+def simple_kalman_1d(measurements: np.ndarray,
+                     process_noise: float = 1e-4,
+                     measurement_noise: float = 1e-2) -> np.ndarray:
+    """Very simple 1D Kalman filter (scalar, no velocity state).
+    KISS: just smooths, no cross-axis coupling.
+    """
+    n = len(measurements)
+    filtered = np.zeros(n)
+    x_est = measurements[0]
+    p_est = 1.0
+
+    for i, z in enumerate(measurements):
+        # Predict
+        p_pred = p_est + process_noise
+        # Update
+        K = p_pred / (p_pred + measurement_noise)
+        x_est = x_est + K * (z - x_est)
+        p_est = (1 - K) * p_pred
+        filtered[i] = x_est
+
+    return filtered
+
+
+def filter_auv_nav(raw_path: Path, out_path: Path,
+                   sigma: float = 3.0, window: int = 5,
+                   use_kalman: bool = False) -> dict:
+    """Filter nav for one AUV. Returns metrics dict."""
+    data = json.loads(raw_path.read_text())
+    points = data.get("points", [])
+    auv_id = data.get("auv_id", raw_path.stem.replace("_nav_raw", ""))
+
+    metrics = {
+        "auv_id": auv_id,
+        "points_before": len(points),
+        "points_after": 0,
+        "points_removed_null": 0,
+        "points_removed_outlier": 0,
+        "status": "ok",
+    }
+
+    if not points:
+        metrics["status"] = "degraded"
+        metrics["note"] = "no points to filter"
+        _save_output(auv_id, [], out_path, metrics)
+        return metrics
+
+    # Step 1: Drop null lat/lon
+    valid = [p for p in points if p.get("lat") is not None and p.get("lon") is not None]
+    metrics["points_removed_null"] = len(points) - len(valid)
+
+    if not valid:
+        metrics["status"] = "degraded"
+        metrics["note"] = "all points have null lat/lon (serial-only data)"
+        print(f"  [03] {auv_id}: all null lat/lon — degraded (serial CSV source, no MCAP nav)")
+        _save_output(auv_id, [], out_path, metrics)
+        return metrics
+
+    # Step 2: MAD outlier removal
+    lats = np.array([p["lat"] for p in valid])
+    lons = np.array([p["lon"] for p in valid])
+
+    mask_lat = mad_outlier_mask(lats, sigma)
+    mask_lon = mad_outlier_mask(lons, sigma)
+    mask = mask_lat & mask_lon
+
+    # Also filter depth if present
+    depths = np.array([p.get("depth") or np.nan for p in valid])
+    if not np.all(np.isnan(depths)):
+        mask_depth = mad_outlier_mask(depths[~np.isnan(depths)], sigma)
+        # Map back — only filter where we have depth
+        depth_idx = np.where(~np.isnan(depths))[0]
+        for i, keep in zip(depth_idx, mask_depth):
+            if not keep:
+                mask[i] = False
+
+    filtered_points = [p for p, keep in zip(valid, mask) if keep]
+    metrics["points_removed_outlier"] = int(np.sum(~mask))
+
+    # Step 3: Sort by timestamp
+    filtered_points.sort(key=lambda p: p["timestamp"])
+
+    # Step 4: Smooth
+    if len(filtered_points) >= window:
+        filt_lats = moving_average(np.array([p["lat"] for p in filtered_points]), window)
+        filt_lons = moving_average(np.array([p["lon"] for p in filtered_points]), window)
+        for i, p in enumerate(filtered_points):
+            p = dict(p)
+            p["lat"] = float(filt_lats[i])
+            p["lon"] = float(filt_lons[i])
+            filtered_points[i] = p
+
+    # Step 5: Optional Kalman
+    if use_kalman and len(filtered_points) > 4:
+        k_lats = simple_kalman_1d(np.array([p["lat"] for p in filtered_points]))
+        k_lons = simple_kalman_1d(np.array([p["lon"] for p in filtered_points]))
+        for i, p in enumerate(filtered_points):
+            p = dict(p)
+            p["lat"] = float(k_lats[i])
+            p["lon"] = float(k_lons[i])
+            filtered_points[i] = p
+
+    metrics["points_after"] = len(filtered_points)
+    if metrics["points_after"] < 5:
+        metrics["status"] = "degraded"
+        metrics["note"] = f"too few points after filter: {metrics['points_after']}"
+
+    print(f"  [03] {auv_id}: {metrics['points_before']} → {metrics['points_after']} "
+          f"(removed {metrics['points_removed_null']} null, {metrics['points_removed_outlier']} outliers)")
+
+    _save_output(auv_id, filtered_points, out_path, metrics)
+    return metrics
+
+
+def _save_output(auv_id: str, points: list[dict], out_dir: Path, metrics: dict) -> None:
+    out_dir.mkdir(parents=True, exist_ok=True)
+    json_out = out_dir / f"{auv_id}_nav_filtered.json"
+    json_out.write_text(json.dumps({
+        "auv_id": auv_id,
+        "generated_at": now_iso(),
+        "metrics": metrics,
+        "points": points,
+    }, indent=2, default=str))
+
+    csv_out = out_dir / f"{auv_id}_nav_filtered.csv"
+    with open(csv_out, "w") as f:
+        f.write("timestamp,lat,lon,depth,heading,source\n")
+        for p in points:
+            f.write(f"{p['timestamp']},{p.get('lat','')},{p.get('lon','')},{p.get('depth','')},{p.get('heading','')},{p.get('source','')}\n")
+
+
+def filter_mission(raw_dir: Path, out_dir: Path | None = None,
+                   auv_filter: str | None = None,
+                   sigma: float = 3.0, window: int = 5,
+                   use_kalman: bool = False) -> list[dict]:
+    if out_dir is None:
+        out_dir = raw_dir.parent / "03_usbl_filtered"
+
+    raw_files = sorted(raw_dir.glob("*_nav_raw.json"))
+    if auv_filter:
+        raw_files = [f for f in raw_files if auv_filter in f.name]
+
+    all_metrics = []
+    init_db()
+
+    for raw_file in raw_files:
+        out_file = out_dir / raw_file.name.replace("_raw", "_filtered")
+        if out_file.exists():
+            print(f"[03] {raw_file.stem}: output exists, skipping")
+            existing = json.loads(out_file.read_text())
+            all_metrics.append(existing.get("metrics", {}))
+            continue
+
+        print(f"[03] Filtering {raw_file.name} ...")
+        m = filter_auv_nav(raw_file, out_dir, sigma=sigma, window=window, use_kalman=use_kalman)
+        all_metrics.append(m)
+
+        # DB record
+        with get_conn() as conn:
+            mission_name = raw_dir.parent.name
+            mission_row = conn.execute("SELECT id FROM missions WHERE name=?", (mission_name,)).fetchone()
+            if mission_row:
+                job_id = upsert_job(conn, mission_row["id"], m["auv_id"], "all", "03_usbl_filter",
+                                    status="done" if m["status"] == "ok" else m["status"],
+                                    output_path=str(out_dir))
+                record_metric(conn, job_id, "usbl_points_before", value=m.get("points_before", 0))
+                record_metric(conn, job_id, "usbl_points_after", value=m.get("points_after", 0),
+                              pass_fail="pass" if m.get("points_after", 0) >= 5 else "fail")
+                record_metric(conn, job_id, "usbl_points_removed_outlier",
+                              value=m.get("points_removed_outlier", 0))
+
+    return all_metrics
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Stage 03 — Filter USBL navigation")
+    ap.add_argument("raw_dir", type=Path, help="Directory with *_nav_raw.json files")
+    ap.add_argument("--out", type=Path, default=None)
+    ap.add_argument("--auv", type=str, default=None)
+    ap.add_argument("--sigma", type=float, default=3.0, help="MAD sigma threshold")
+    ap.add_argument("--window", type=int, default=5, help="Moving average window")
+    ap.add_argument("--kalman", action="store_true", help="Apply simple Kalman smoothing")
+    args = ap.parse_args()
+
+    metrics = filter_mission(args.raw_dir, out_dir=args.out, auv_filter=args.auv,
+                             sigma=args.sigma, window=args.window, use_kalman=args.kalman)
+
+    print("\n=== Stage 03 summary ===")
+    for m in metrics:
+        print(f"  {m.get('auv_id','?')}: {m.get('points_before',0)} → {m.get('points_after',0)} [{m.get('status','?')}]")
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/03b_trim_runs.py
+++ b/pipeline/stages/03b_trim_runs.py
@@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+"""Stage 03b - Trim videos per run (LRV proxies + -c copy, fast).
+
+Inputs:
+  data/<MISSION>/02_runs.json
+  data/<MISSION>/03_video_index.json
+
+Strategy:
+  - Use GoPro LRV proxy files (768x432 H.264 ~720 kbps) instead of 4K HEVC originals.
+  - ffmpeg -c copy per chapter (keyframe-aligned cut) + concat demuxer.
+  - Output: per-run .mp4 + ours_<gp>.mp4 (concat of per-run).
+
+Falls back to MP4 source if matching LRV is missing.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+from collections import defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+
+OUT_ROOT = Path("/mnt/ssd/cosma-qc-out/03b_trim_runs")
+
+
+def run_ff(cmd: list[str]) -> None:
+    r = subprocess.run(cmd, capture_output=True, text=True)
+    if r.returncode != 0:
+        sys.stderr.write(" ".join(cmd) + "\n")
+        sys.stderr.write(r.stderr[-3000:] + "\n")
+        raise RuntimeError(f"ffmpeg failed rc={r.returncode}")
+
+
+def lrv_for_chapter(mp4_path: Path) -> Path | None:
+    """Return matching .LRV path if it exists (GoPro low-res proxy)."""
+    name = mp4_path.name
+    if not name.startswith("GX") or not name.upper().endswith(".MP4"):
+        return None
+    lrv_name = "GL" + name[2:-4] + ".LRV"
+    p = mp4_path.parent / lrv_name
+    return p if p.exists() else None
+
+
+def overlap_clips(run_start: float, run_end: float, chapters: list[dict]) -> list[tuple[dict, float, float]]:
+    """Return [(chapter, start_off_s, duration_s)] for chapters overlapping the run."""
+    out = []
+    for ch in sorted(chapters, key=lambda c: c["start_epoch"]):
+        a = max(run_start, ch["start_epoch"])
+        b = min(run_end, ch["end_epoch"])
+        if b - a <= 1.0:
+            continue
+        start_off = max(0.0, run_start - ch["start_epoch"])
+        dur = b - a
+        out.append((ch, start_off, dur))
+    return out
+
+
+def cut_clip(src: Path, start_off: float, duration: float, dst: Path) -> None:
+    """Cut [start_off, start_off+duration] from src using -c copy (keyframe-aligned)."""
+    cmd = [
+        "ffmpeg", "-y", "-loglevel", "error",
+        "-ss", f"{start_off:.3f}",
+        "-i", str(src),
+        "-t", f"{duration:.3f}",
+        "-c", "copy",
+        "-avoid_negative_ts", "make_zero",
+        "-an",
+        str(dst),
+    ]
+    run_ff(cmd)
+
+
+def concat_demux(parts: list[Path], dst: Path) -> None:
+    """Concat parts with ffmpeg concat demuxer (-c copy)."""
+    if not parts:
+        return
+    if len(parts) == 1:
+        shutil.copy2(parts[0], dst)
+        return
+    with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as f:
+        for p in parts:
+            f.write(f"file '{p.resolve()}'\n")
+        listfile = f.name
+    try:
+        cmd = [
+            "ffmpeg", "-y", "-loglevel", "error",
+            "-f", "concat", "-safe", "0",
+            "-i", listfile,
+            "-c", "copy",
+            "-movflags", "+faststart",
+            "-an",
+            str(dst),
+        ]
+        run_ff(cmd)
+    finally:
+        os.unlink(listfile)
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--mission", required=True)
+    ap.add_argument("--data-root", default="/home/cosma/cosma-qc/data")
+    ap.add_argument("--skip-existing", action="store_true")
+    ap.add_argument("--prefer-source", choices=["lrv", "mp4"], default="lrv",
+                    help="lrv = use .LRV proxy (default, fast); mp4 = use 4K originals")
+    args = ap.parse_args()
+
+    mission = args.mission
+    data_dir = Path(args.data_root) / mission
+
+    runs = json.loads((data_dir / "02_runs.json").read_text())["runs"]
+    vidx = json.loads((data_dir / "03_video_index.json").read_text())
+    videos = vidx["videos"]
+
+    by_auv_gp: dict[tuple[str, str], list[dict]] = defaultdict(list)
+    for v in videos:
+        by_auv_gp[(v["auv"], v["gp"])].append(v)
+    all_gps = sorted({v["gp"] for v in videos})
+
+    out_dir = OUT_ROOT / mission
+    out_dir.mkdir(parents=True, exist_ok=True)
+    tmp_dir = out_dir / "_tmp"
+    tmp_dir.mkdir(exist_ok=True)
+
+    link = data_dir / "03b_trim_runs"
+    if link.is_symlink():
+        link.unlink()
+    elif link.exists():
+        shutil.rmtree(link)
+    link.symlink_to(out_dir)
+
+    manifest = {
+        "mission": mission,
+        "generated_at": datetime.now(timezone.utc).isoformat(),
+        "output_root": str(out_dir),
+        "source": args.prefer_source,
+        "runs": [],
+        "ours": {},
+    }
+
+    runs_by_chrono = sorted(runs, key=lambda r: r["start_epoch"])
+    ours_parts: dict[str, list[tuple[float, Path, str]]] = defaultdict(list)
+
+    for run in runs_by_chrono:
+        run_id = run["run_id"]
+        auv = run["auv"]
+        r_start = run["start_epoch"]
+        r_end = run["end_epoch"]
+        run_entry = {"run_id": run_id, "auv": auv, "duration_s": run["duration_s"], "outputs": []}
+
+        for gp in all_gps:
+            chapters = by_auv_gp.get((auv, gp), [])
+            if not chapters:
+                continue
+            clips = overlap_clips(r_start, r_end, chapters)
+            if not clips:
+                continue
+
+            out_name = f"{run_id}_{auv}_{gp}.mp4"
+            out_path = out_dir / out_name
+            if args.skip_existing and out_path.exists() and out_path.stat().st_size > 0:
+                print(f"[skip] {out_name}", flush=True)
+            else:
+                # Pick source per chapter
+                resolved: list[tuple[Path, float, float, str]] = []
+                src_tags: list[str] = []
+                for ch, soff, dur in clips:
+                    mp4 = Path(ch["filepath"])
+                    src = mp4
+                    tag = "mp4"
+                    if args.prefer_source == "lrv":
+                        lrv = lrv_for_chapter(mp4)
+                        if lrv:
+                            src = lrv
+                            tag = "lrv"
+                    resolved.append((src, soff, dur, tag))
+                    src_tags.append(tag)
+
+                print(
+                    f"[cut ] {out_name}  chapters={len(resolved)}  src={','.join(src_tags)}",
+                    flush=True,
+                )
+                tmp_parts: list[Path] = []
+                for i, (src, soff, dur, _) in enumerate(resolved):
+                    tp = tmp_dir / f"{run_id}_{auv}_{gp}_p{i:02d}.mp4"
+                    cut_clip(src, soff, dur, tp)
+                    tmp_parts.append(tp)
+                concat_demux(tmp_parts, out_path)
+                for p in tmp_parts:
+                    p.unlink(missing_ok=True)
+
+            sz_mb = round(out_path.stat().st_size / 1024 / 1024, 1)
+            run_entry["outputs"].append({"gp": gp, "file": out_name, "size_mb": sz_mb})
+            ours_parts[gp].append((r_start, out_path, f"{run_id} {auv}"))
+
+        manifest["runs"].append(run_entry)
+
+    for gp, parts in ours_parts.items():
+        parts.sort()
+        ordered_paths = [p for _, p, _ in parts]
+        ours_path = out_dir / f"ours_{gp}.mp4"
+        print(f"[ours] {ours_path.name} <- {len(ordered_paths)} clip(s)", flush=True)
+        concat_demux(ordered_paths, ours_path)
+        sz_mb = round(ours_path.stat().st_size / 1024 / 1024, 1)
+        manifest["ours"][gp] = {
+            "file": ours_path.name,
+            "size_mb": sz_mb,
+            "segments": [lbl for _, _, lbl in parts],
+        }
+
+    (data_dir / "03b_trim_runs.json").write_text(json.dumps(manifest, indent=2))
+    print(f"\n[done] manifest: {data_dir / '03b_trim_runs.json'}", flush=True)
+    print(f"[done] outputs:  {out_dir}", flush=True)
+
+    try:
+        tmp_dir.rmdir()
+    except OSError:
+        pass
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/04_frame_extract.py
+++ b/pipeline/stages/04_frame_extract.py
@@ -0,0 +1,341 @@
+#!/usr/bin/env python3
+"""Stage 04 — Extract frames from GoPro videos.
+
+For each MP4 in /mnt/ssd/<mission>/raw_data/medias/videos/GP*-AUV*/
+- Skip files < 2MB (placeholders)
+- Auto-trim hors-eau: sample frames at start/end, detect non-blue/green pixels
+- ffmpeg fps=1, scale=518:294, q:v=3
+- Output: ~/cosma-pipeline/data/<mission>/frames/<AUV>/<segment>/frame_XXXXX.jpg
+- Skip if output dir exists and has >= expected frames
+- Log to SQLite state.db
+
+Usage:
+    python3 04_frame_extract.py --mission 20260505-Lepradet
+    python3 04_frame_extract.py --video /mnt/ssd/.../GP1-AUV210/GX019837.MP4
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+import cv2
+import numpy as np
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).parent))
+from orchestrator.db import init_db, get_conn, upsert_job, record_metric, now_iso
+from lib_frame_qc import score_image_file, aggregate as qc_aggregate
+
+QC_SAMPLE_RATE = int(os.environ.get("COSMA_QC_SAMPLE_RATE", "5"))
+QC_BOTTOM_OK_PCT = float(os.environ.get("COSMA_QC_BOTTOM_OK_PCT", "50"))
+
+PIPELINE_BASE = Path(os.environ.get("COSMA_PIPELINE_BASE", "/home/cosma/cosma-pipeline"))
+SSD_BASE = Path(os.environ.get("COSMA_SSD_BASE", "/mnt/ssd"))
+MIN_VIDEO_SIZE_MB = 2.0
+
+
+def is_underwater_frame(frame_bgr: np.ndarray, threshold: float = 0.6) -> bool:
+    """Return True if frame looks like underwater footage (dominant blue/green).
+    Hors-eau: R > G-5 AND R > B-5 (dry/air dominant).
+    Underwater: blue or green channel dominant.
+    """
+    b, g, r = cv2.split(frame_bgr.astype(np.float32))
+    mean_r = float(np.mean(r))
+    mean_g = float(np.mean(g))
+    mean_b = float(np.mean(b))
+    # Not underwater: red dominates
+    if mean_r > mean_g - 5 and mean_r > mean_b - 5:
+        return False
+    return True
+
+
+def detect_water_range(video_path: Path, sample_count: int = 10) -> tuple[float, float]:
+    """Detect start/end times of underwater portion by sampling frames.
+    Returns (start_s, end_s). Returns (0, duration) if uncertain.
+    """
+    cap = cv2.VideoCapture(str(video_path))
+    if not cap.isOpened():
+        return 0.0, -1.0
+
+    fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    duration_s = total_frames / fps if fps > 0 else 0
+
+    # Sample frames: first 20% and last 20%
+    probe_times_start = [duration_s * i / (sample_count * 5) for i in range(sample_count)]
+    probe_times_end = [duration_s * (1 - i / (sample_count * 5)) for i in range(sample_count)]
+
+    # Find first underwater frame from start
+    start_s = 0.0
+    for t in probe_times_start:
+        cap.set(cv2.CAP_PROP_POS_MSEC, t * 1000)
+        ret, frame = cap.read()
+        if ret and is_underwater_frame(frame):
+            start_s = max(0.0, t - 2.0)
+            break
+
+    # Find last underwater frame from end
+    end_s = duration_s
+    for t in sorted(probe_times_end, reverse=True):
+        cap.set(cv2.CAP_PROP_POS_MSEC, t * 1000)
+        ret, frame = cap.read()
+        if ret and is_underwater_frame(frame):
+            end_s = min(duration_s, t + 2.0)
+            break
+
+    cap.release()
+    return start_s, end_s
+
+
+def get_video_duration(video_path: Path) -> float:
+    """Get video duration in seconds via ffprobe."""
+    cmd = [
+        "ffprobe", "-v", "quiet", "-print_format", "json",
+        "-show_streams", str(video_path)
+    ]
+    try:
+        r = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+        data = json.loads(r.stdout)
+        for stream in data.get("streams", []):
+            dur = float(stream.get("duration", 0))
+            if dur > 0:
+                return dur
+    except Exception:
+        pass
+    return 0.0
+
+
+def extract_frames(video_path: Path, out_dir: Path, fps: int = 1,
+                   scale: str = "518:294", quality: int = 3,
+                   start_s: float = 0.0, end_s: float = -1.0) -> dict:
+    """Run ffmpeg to extract frames. Returns metrics dict."""
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    # Build ffmpeg args
+    cmd = ["ffmpeg", "-y", "-loglevel", "error"]
+    cmd += ["-ss", str(start_s), "-i", str(video_path)]
+    if end_s > 0 and end_s > start_s:
+        cmd += ["-t", str(end_s - start_s)]
+    cmd += [
+        "-vf", f"fps={fps},scale={scale}",
+        "-q:v", str(quality),
+        str(out_dir / "frame_%05d.jpg"),
+    ]
+
+    t0 = time.time()
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
+    elapsed = time.time() - t0
+
+    frames = sorted(out_dir.glob("frame_*.jpg"))
+    n_frames = len(frames)
+
+    metrics = {
+        "video": str(video_path),
+        "out_dir": str(out_dir),
+        "n_frames": n_frames,
+        "elapsed_s": round(elapsed, 1),
+        "returncode": result.returncode,
+        "start_s": start_s,
+        "end_s": end_s,
+    }
+    if result.returncode != 0:
+        metrics["error"] = result.stderr[-500:]
+        print(f"  [04] ffmpeg error for {video_path.name}: {result.stderr[-200:]}")
+    else:
+        print(f"  [04] {video_path.name}: {n_frames} frames in {elapsed:.1f}s")
+        # Score a subsample for QC
+        qc = qc_segment(out_dir, sample_rate=QC_SAMPLE_RATE)
+        if qc:
+            metrics.update(qc)
+
+    return metrics
+
+
+def qc_segment(frames_dir: Path, sample_rate: int = 5) -> dict | None:
+    """Sample 1/sample_rate frames, score each, write qc.json, return aggregate."""
+    frames = sorted(frames_dir.glob("frame_*.jpg"))
+    if not frames:
+        return None
+    sampled = frames[::max(1, sample_rate)]
+    per_frame = []
+    for f in sampled:
+        s = score_image_file(f)
+        if s is not None:
+            per_frame.append(s)
+    if not per_frame:
+        return None
+    agg = qc_aggregate(per_frame)
+    qc_payload = {
+        "frames_in_dir": len(frames),
+        "frames_sampled": len(per_frame),
+        "sample_rate": sample_rate,
+        **agg,
+        "per_frame": per_frame,
+    }
+    try:
+        (frames_dir / "qc.json").write_text(json.dumps(qc_payload, indent=2))
+    except Exception as e:
+        print(f"  [04] qc.json write failed: {e}")
+    print(
+        f"  [04]   QC: bottom_visible={agg['bottom_visible_pct']}% "
+        f"(b={agg['frames_bottom_visible']} ooo={agg['frames_out_of_water']} "
+        f"turb={agg['frames_turbid']} nob={agg['frames_water_no_bottom']})"
+    )
+    return agg
+
+
+def process_video(video_path: Path, auv_id: str, mission_name: str) -> dict:
+    """Process one video file. Returns metrics."""
+    size_mb = video_path.stat().st_size / (1024 * 1024)
+    if size_mb < MIN_VIDEO_SIZE_MB:
+        print(f"  [04] Skip {video_path.name} ({size_mb:.1f}MB < {MIN_VIDEO_SIZE_MB}MB)")
+        return {"video": str(video_path), "skipped": True, "reason": "placeholder"}
+
+    segment = video_path.stem
+    out_dir = PIPELINE_BASE / "data" / mission_name / "frames" / auv_id / segment
+
+    # Check if already done
+    if out_dir.exists():
+        existing = list(out_dir.glob("frame_*.jpg"))
+        duration_s = get_video_duration(video_path)
+        expected = max(1, int(duration_s) - 10) if duration_s > 0 else 1
+        if len(existing) >= expected:
+            print(f"  [04] {video_path.name}: already done ({len(existing)} frames), skip")
+            cached_m: dict = {"video": str(video_path), "n_frames": len(existing), "cached": True,
+                              "out_dir": str(out_dir)}
+            # Re-run QC if qc.json is missing (idempotent enrichment)
+            if not (out_dir / "qc.json").exists():
+                qc = qc_segment(out_dir, sample_rate=QC_SAMPLE_RATE)
+                if qc:
+                    cached_m.update(qc)
+            else:
+                try:
+                    cached_qc = json.loads((out_dir / "qc.json").read_text())
+                    for k in (
+                        "frames_total", "frames_bottom_visible", "frames_out_of_water",
+                        "frames_turbid", "frames_water_no_bottom", "bottom_visible_pct",
+                    ):
+                        if k in cached_qc:
+                            cached_m[k] = cached_qc[k]
+                except Exception:
+                    pass
+            return cached_m
+
+    print(f"  [04] {video_path.name} ({size_mb:.0f}MB): detecting water range...")
+    start_s, end_s = detect_water_range(video_path)
+    print(f"  [04]   water range: {start_s:.1f}s → {end_s:.1f}s")
+
+    return extract_frames(video_path, out_dir, start_s=start_s, end_s=end_s)
+
+
+def find_auv_videos(mission_path: Path) -> dict[str, list[Path]]:
+    """Find all MP4 files per AUV in medias/videos/GP*-AUV*/."""
+    videos_root = mission_path / "raw_data/medias/videos"
+    result: dict[str, list[Path]] = {}
+
+    for gopro_dir in sorted(videos_root.glob("GP*-AUV*")):
+        # Extract AUV ID from dir name: GP1-AUV210 -> AUV210
+        parts = gopro_dir.name.split("-")
+        if len(parts) >= 2:
+            auv_id = parts[1]
+            mp4_files = [f for f in sorted(gopro_dir.glob("GX*.MP4"))
+                         if f.stat().st_size / (1024 * 1024) >= MIN_VIDEO_SIZE_MB]
+            if mp4_files:
+                if auv_id not in result:
+                    result[auv_id] = []
+                result[auv_id].extend(mp4_files)
+
+    return result
+
+
+def process_mission(mission_name: str) -> list[dict]:
+    mission_path = SSD_BASE / mission_name
+    auv_videos = find_auv_videos(mission_path)
+    print(f"[04] Found AUVs: {list(auv_videos.keys())}")
+
+    all_metrics = []
+    init_db()
+
+    for auv_id, videos in sorted(auv_videos.items()):
+        print(f"[04] === {auv_id}: {len(videos)} videos ===")
+        for video_path in videos:
+            t0 = time.time()
+            m = process_video(video_path, auv_id, mission_name)
+            m["auv_id"] = auv_id
+            all_metrics.append(m)
+
+            if not m.get("skipped"):
+                with get_conn() as conn:
+                    mission_row = conn.execute(
+                        "SELECT id FROM missions WHERE name=?", (mission_name,)
+                    ).fetchone()
+                    if mission_row:
+                        bottom_pct = m.get("bottom_visible_pct")
+                        if m.get("returncode", 0) != 0:
+                            job_status = "error"
+                        elif bottom_pct is not None and bottom_pct < QC_BOTTOM_OK_PCT:
+                            job_status = "degraded"
+                        else:
+                            job_status = "done"
+                        job_id = upsert_job(
+                            conn, mission_row["id"], auv_id,
+                            video_path.stem, "04_frame_extract",
+                            status=job_status,
+                            output_path=m.get("out_dir", ""),
+                            error_msg=(
+                                f"bottom_visible_pct={bottom_pct}% <{QC_BOTTOM_OK_PCT}%"
+                                if job_status == "degraded" else None
+                            ),
+                        )
+                        if not m.get("cached"):
+                            record_metric(conn, job_id, "frames_extracted",
+                                          value=m.get("n_frames", 0),
+                                          pass_fail="pass" if m.get("n_frames", 0) > 0 else "fail")
+                            record_metric(conn, job_id, "extract_time_s",
+                                          value=m.get("elapsed_s", 0))
+                        # Always record QC metrics (so cached frames also get scored history)
+                        for k in (
+                            "frames_total", "frames_bottom_visible", "frames_out_of_water",
+                            "frames_turbid", "frames_water_no_bottom",
+                        ):
+                            if k in m:
+                                record_metric(conn, job_id, k, value=float(m[k]))
+                        if bottom_pct is not None:
+                            record_metric(
+                                conn, job_id, "bottom_visible_pct",
+                                value=float(bottom_pct),
+                                pass_fail="pass" if bottom_pct >= QC_BOTTOM_OK_PCT else "degraded",
+                            )
+
+    return all_metrics
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Stage 04 — Extract frames from GoPro videos")
+    ap.add_argument("--mission", type=str, help="Mission name (e.g. 20260505-Lepradet)")
+    ap.add_argument("--video", type=Path, help="Single video path")
+    ap.add_argument("--auv", type=str, default="UNKNOWN", help="AUV ID for single video mode")
+    args = ap.parse_args()
+
+    if args.video:
+        mission_name = args.mission or "unknown"
+        m = process_video(args.video, args.auv, mission_name)
+        print(f"\nResult: {m}")
+    elif args.mission:
+        metrics = process_mission(args.mission)
+        print("\n=== Stage 04 summary ===")
+        total_frames = sum(m.get("n_frames", 0) for m in metrics if not m.get("skipped"))
+        skipped = sum(1 for m in metrics if m.get("skipped"))
+        print(f"Total frames: {total_frames}, skipped: {skipped}")
+    else:
+        ap.print_help()
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/04b_trim_water.py
+++ b/pipeline/stages/04b_trim_water.py
@@ -0,0 +1,428 @@
+#!/usr/bin/env python3
+"""Stage 04b — Trim out-of-water (hors-eau) head/tail frames from already-extracted segments.
+
+Ports the sustained-run trim logic from cosma-qc/scripts/dispatcher.py (_AUTO_TRIM_SCRIPT,
+trim_above_water_prefix) into the new cosma-pipeline pipeline. Re-runs frame QC scoring
+on the trimmed set and updates state.db (jobs.status + metrics).
+
+Usage:
+    python3 04b_trim_water.py --mission 20260505-Lepradet
+    python3 04b_trim_water.py --mission 20260505-Lepradet --auv AUV210 --segment GX019837
+    python3 04b_trim_water.py --mission 20260505-Lepradet --dry-run
+
+Safety:
+    - Skips segments where ffmpeg is still running on the frames dir (extraction in progress).
+    - Skips segments with a queued/running 05_inference job in state.db.
+    - Skips segments whose frame count is not stable over a 5s window.
+    - Never deletes all frames (sanity floor: keep everything if trim would empty the dir).
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+import cv2
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+sys.path.insert(0, str(Path(__file__).parent))
+from orchestrator.db import init_db, get_conn, upsert_job, record_metric, now_iso
+from lib_frame_qc import score_image_file, aggregate as qc_aggregate
+
+PIPELINE_BASE = Path(os.environ.get("COSMA_PIPELINE_BASE", "/home/cosma/cosma-pipeline"))
+QC_SAMPLE_RATE = int(os.environ.get("COSMA_QC_SAMPLE_RATE", "5"))
+QC_BOTTOM_OK_PCT = float(os.environ.get("COSMA_QC_BOTTOM_OK_PCT", "50"))
+NEED_STREAK = 10  # consecutive underwater frames required to lock start/end
+
+
+# -----------------------------------------------------------------------------
+# Trim logic (ported verbatim from dispatcher._AUTO_TRIM_SCRIPT)
+# -----------------------------------------------------------------------------
+
+def is_underwater(path: Path) -> bool | None:
+    img = cv2.imread(str(path), cv2.IMREAD_REDUCED_COLOR_4)
+    if img is None:
+        return None
+    b, g, r = [float(c) for c in cv2.mean(img)[:3]]
+    # Red is absorbed by water → R < G AND R < B on underwater shots.
+    return r < g - 5 and r < b - 5
+
+
+def trim_segment(frames_dir: Path, dry_run: bool = False) -> tuple[int, int, int]:
+    """Delete leading and trailing out-of-water frames.
+    Returns (head_removed, tail_removed, remaining).
+    """
+    paths = sorted(frames_dir.glob("frame_*.jpg"))
+    if not paths:
+        return (0, 0, 0)
+
+    # Scan from start
+    start = 0
+    streak = 0
+    for i, p in enumerate(paths):
+        uw = is_underwater(p)
+        if uw is None:
+            continue
+        if uw:
+            streak += 1
+            if streak >= NEED_STREAK:
+                start = i - NEED_STREAK + 1
+                break
+        else:
+            streak = 0
+
+    # Scan from end
+    end = len(paths)
+    streak = 0
+    for j in range(len(paths) - 1, -1, -1):
+        uw = is_underwater(paths[j])
+        if uw is None:
+            continue
+        if uw:
+            streak += 1
+            if streak >= NEED_STREAK:
+                end = j + NEED_STREAK  # exclusive
+                break
+        else:
+            streak = 0
+
+    if end <= start:
+        # Sanity: never delete everything.
+        start = 0
+        end = len(paths)
+
+    removed_head = start
+    removed_tail = len(paths) - end
+
+    if not dry_run:
+        for p in paths[:start]:
+            try:
+                p.unlink()
+            except OSError:
+                pass
+        for p in paths[end:]:
+            try:
+                p.unlink()
+            except OSError:
+                pass
+
+    return (removed_head, removed_tail, end - start)
+
+
+# -----------------------------------------------------------------------------
+# Safety: is this segment currently being touched?
+# -----------------------------------------------------------------------------
+
+def has_ffmpeg_running_on(frames_dir: Path) -> bool:
+    """Check if any ffmpeg process is writing into frames_dir."""
+    try:
+        r = subprocess.run(
+            ["pgrep", "-af", "ffmpeg"], capture_output=True, text=True, timeout=5
+        )
+        for line in r.stdout.splitlines():
+            if str(frames_dir) in line:
+                return True
+    except Exception:
+        pass
+    return False
+
+
+def has_inference_running_on(frames_dir: Path) -> bool:
+    """Check if any 05_inference.py process is running on frames_dir."""
+    try:
+        r = subprocess.run(
+            ["pgrep", "-af", "05_inference"], capture_output=True, text=True, timeout=5
+        )
+        for line in r.stdout.splitlines():
+            if str(frames_dir) in line:
+                return True
+    except Exception:
+        pass
+    return False
+
+
+def has_pending_inference_job(conn, mission_id: int, auv_id: str, segment: str) -> bool:
+    """Check state.db for queued/running 05_inference job on this segment."""
+    row = conn.execute(
+        "SELECT status FROM jobs WHERE mission_id=? AND auv_id=? "
+        "AND segment_label=? AND stage='05_inference'",
+        (mission_id, auv_id, segment),
+    ).fetchone()
+    if row is None:
+        return False
+    return row["status"] in ("queued", "running")
+
+
+def frame_count_is_stable(frames_dir: Path, wait_s: float = 5.0) -> bool:
+    """Return True if the frame count doesn't change over wait_s."""
+    n1 = sum(1 for _ in frames_dir.glob("frame_*.jpg"))
+    time.sleep(wait_s)
+    n2 = sum(1 for _ in frames_dir.glob("frame_*.jpg"))
+    return n1 == n2
+
+
+# -----------------------------------------------------------------------------
+# QC re-scoring (mirrors stage 04 qc_segment)
+# -----------------------------------------------------------------------------
+
+def qc_segment(frames_dir: Path, sample_rate: int = QC_SAMPLE_RATE) -> dict | None:
+    frames = sorted(frames_dir.glob("frame_*.jpg"))
+    if not frames:
+        return None
+    sampled = frames[::max(1, sample_rate)]
+    per_frame = []
+    for f in sampled:
+        s = score_image_file(f)
+        if s is not None:
+            per_frame.append(s)
+    if not per_frame:
+        return None
+    agg = qc_aggregate(per_frame)
+    qc_payload = {
+        "frames_in_dir": len(frames),
+        "frames_sampled": len(per_frame),
+        "sample_rate": sample_rate,
+        **agg,
+        "per_frame": per_frame,
+    }
+    try:
+        (frames_dir / "qc.json").write_text(json.dumps(qc_payload, indent=2))
+    except Exception as e:
+        print(f"  [04b] qc.json write failed: {e}")
+    return agg
+
+
+# -----------------------------------------------------------------------------
+# Main per-segment driver
+# -----------------------------------------------------------------------------
+
+def process_segment(mission_name: str, auv_id: str, segment: str,
+                    frames_dir: Path, dry_run: bool, conn) -> dict:
+    result = {
+        "auv_id": auv_id,
+        "segment": segment,
+        "frames_dir": str(frames_dir),
+        "skipped": False,
+        "head_removed": 0,
+        "tail_removed": 0,
+        "remaining": 0,
+        "before_total": 0,
+        "before_bottom_pct": None,
+        "after_bottom_pct": None,
+        "status_before": None,
+        "status_after": None,
+    }
+
+    if not frames_dir.is_dir():
+        result["skipped"] = True
+        result["reason"] = "no_frames_dir"
+        return result
+
+    # Safety checks
+    if has_ffmpeg_running_on(frames_dir):
+        result["skipped"] = True
+        result["reason"] = "ffmpeg_running"
+        print(f"  [04b] SKIP {auv_id}/{segment}: ffmpeg still extracting")
+        return result
+
+    if has_inference_running_on(frames_dir):
+        result["skipped"] = True
+        result["reason"] = "inference_running_proc"
+        print(f"  [04b] SKIP {auv_id}/{segment}: 05_inference process running")
+        return result
+
+    # Look up mission_id + current 04 job
+    mission_row = conn.execute(
+        "SELECT id FROM missions WHERE name=?", (mission_name,)
+    ).fetchone()
+    if not mission_row:
+        result["skipped"] = True
+        result["reason"] = "mission_not_in_db"
+        return result
+    mission_id = mission_row["id"]
+
+    if has_pending_inference_job(conn, mission_id, auv_id, segment):
+        result["skipped"] = True
+        result["reason"] = "inference_job_pending"
+        print(f"  [04b] SKIP {auv_id}/{segment}: 05_inference queued/running in DB")
+        return result
+
+    if not frame_count_is_stable(frames_dir, wait_s=5.0):
+        result["skipped"] = True
+        result["reason"] = "frame_count_unstable"
+        print(f"  [04b] SKIP {auv_id}/{segment}: frame count not stable")
+        return result
+
+    # Snapshot before
+    before_paths = sorted(frames_dir.glob("frame_*.jpg"))
+    result["before_total"] = len(before_paths)
+    job04_row = conn.execute(
+        "SELECT id, status FROM jobs WHERE mission_id=? AND auv_id=? "
+        "AND segment_label=? AND stage='04_frame_extract'",
+        (mission_id, auv_id, segment),
+    ).fetchone()
+    if job04_row is None:
+        result["skipped"] = True
+        result["reason"] = "no_04_job_in_db"
+        print(f"  [04b] SKIP {auv_id}/{segment}: no 04 job row")
+        return result
+    result["status_before"] = job04_row["status"]
+
+    # Read current QC if available
+    qc_path = frames_dir / "qc.json"
+    if qc_path.exists():
+        try:
+            result["before_bottom_pct"] = json.loads(qc_path.read_text()).get("bottom_visible_pct")
+        except Exception:
+            pass
+
+    # Trim
+    head, tail, remaining = trim_segment(frames_dir, dry_run=dry_run)
+    result["head_removed"] = head
+    result["tail_removed"] = tail
+    result["remaining"] = remaining
+
+    # Re-QC if not dry-run and something was trimmed (or always to keep metrics fresh)
+    after_agg = None
+    if not dry_run and (head > 0 or tail > 0):
+        after_agg = qc_segment(frames_dir)
+    elif dry_run:
+        # In dry-run, don't touch qc.json; compute aggregate from remaining slice in-memory
+        remaining_paths = sorted(frames_dir.glob("frame_*.jpg"))[head: len(before_paths) - tail]
+        sampled = remaining_paths[::max(1, QC_SAMPLE_RATE)]
+        per_frame = [s for s in (score_image_file(f) for f in sampled) if s is not None]
+        if per_frame:
+            after_agg = qc_aggregate(per_frame)
+
+    if after_agg is not None:
+        result["after_bottom_pct"] = after_agg.get("bottom_visible_pct")
+
+    if dry_run:
+        print(
+            f"  [04b] DRY {auv_id}/{segment}: head={head} tail={tail} "
+            f"remaining={remaining} (before={len(before_paths)}, "
+            f"bottom_pct {result['before_bottom_pct']}→{result['after_bottom_pct']})"
+        )
+        return result
+
+    # Update DB: job row + metrics
+    job_id = job04_row["id"]
+    bottom_pct = after_agg.get("bottom_visible_pct") if after_agg else None
+
+    if bottom_pct is not None and bottom_pct >= QC_BOTTOM_OK_PCT:
+        new_status = "done"
+        err_msg = None
+    elif bottom_pct is not None:
+        new_status = "degraded"
+        err_msg = f"bottom_visible_pct={bottom_pct}% <{QC_BOTTOM_OK_PCT}% (after trim)"
+    else:
+        new_status = job04_row["status"]
+        err_msg = None
+
+    upsert_job(
+        conn, mission_id, auv_id, segment, "04_frame_extract",
+        status=new_status,
+        output_path=str(frames_dir),
+        error_msg=err_msg,
+    )
+    record_metric(conn, job_id, "trimmed_head", value=float(head))
+    record_metric(conn, job_id, "trimmed_tail", value=float(tail))
+    record_metric(conn, job_id, "frames_after_trim", value=float(remaining))
+    if after_agg:
+        for k in (
+            "frames_total", "frames_bottom_visible", "frames_out_of_water",
+            "frames_turbid", "frames_water_no_bottom",
+        ):
+            if k in after_agg:
+                record_metric(conn, job_id, k, value=float(after_agg[k]))
+        if bottom_pct is not None:
+            record_metric(
+                conn, job_id, "bottom_visible_pct",
+                value=float(bottom_pct),
+                pass_fail="pass" if bottom_pct >= QC_BOTTOM_OK_PCT else "degraded",
+            )
+
+    result["status_after"] = new_status
+    print(
+        f"  [04b] {auv_id}/{segment}: trimmed head={head} tail={tail} "
+        f"remaining={remaining}, bottom_pct={bottom_pct}% ({result['status_before']}→{new_status})"
+    )
+    return result
+
+
+# -----------------------------------------------------------------------------
+# Discovery + CLI
+# -----------------------------------------------------------------------------
+
+def find_segments(mission_name: str, auv_filter: str | None,
+                  segment_filter: str | None) -> list[tuple[str, str, Path]]:
+    base = PIPELINE_BASE / "data" / mission_name / "frames"
+    out: list[tuple[str, str, Path]] = []
+    if not base.is_dir():
+        return out
+    for auv_dir in sorted(base.iterdir()):
+        if not auv_dir.is_dir():
+            continue
+        if auv_filter and auv_dir.name != auv_filter:
+            continue
+        for seg_dir in sorted(auv_dir.iterdir()):
+            if not seg_dir.is_dir():
+                continue
+            if segment_filter and seg_dir.name != segment_filter:
+                continue
+            out.append((auv_dir.name, seg_dir.name, seg_dir))
+    return out
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Stage 04b — Trim hors-eau head/tail frames")
+    ap.add_argument("--mission", default="20260505-Lepradet")
+    ap.add_argument("--auv")
+    ap.add_argument("--segment")
+    ap.add_argument("--dry-run", action="store_true")
+    args = ap.parse_args()
+
+    init_db()
+
+    segments = find_segments(args.mission, args.auv, args.segment)
+    if not segments:
+        print(f"[04b] No segments found under {args.mission}")
+        sys.exit(1)
+
+    print(f"[04b] Mission={args.mission}  segments={len(segments)}  dry_run={args.dry_run}")
+    results: list[dict] = []
+    with get_conn() as conn:
+        for auv_id, segment, frames_dir in segments:
+            try:
+                r = process_segment(args.mission, auv_id, segment, frames_dir,
+                                    args.dry_run, conn)
+            except Exception as e:
+                r = {"auv_id": auv_id, "segment": segment, "error": str(e),
+                     "skipped": True}
+                print(f"  [04b] ERR {auv_id}/{segment}: {e}")
+            results.append(r)
+
+    # Summary
+    print("\n=== Stage 04b summary ===")
+    upgraded = [r for r in results
+                if r.get("status_before") == "degraded" and r.get("status_after") == "done"]
+    still_degraded = [r for r in results
+                      if r.get("status_after") == "degraded"]
+    skipped = [r for r in results if r.get("skipped")]
+    print(f"Upgraded degraded→done : {len(upgraded)}")
+    for r in upgraded:
+        print(f"  + {r['auv_id']}/{r['segment']} "
+              f"({r['before_bottom_pct']}%→{r['after_bottom_pct']}%, "
+              f"trim head={r['head_removed']} tail={r['tail_removed']})")
+    print(f"Still degraded         : {len(still_degraded)}")
+    print(f"Skipped                : {len(skipped)}")
+    for r in skipped:
+        print(f"  - {r['auv_id']}/{r['segment']}: {r.get('reason', 'unknown')}")
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/05_inference.py
+++ b/pipeline/stages/05_inference.py
@@ -0,0 +1,376 @@
+#!/usr/bin/env python3
+"""Stage 05 — Run lingbot-map inference on extracted frames.
+
+Args:
+    --frames-dir <path>   Directory with frame_*.jpg (or parent with AUV subdirs)
+    --worker <auto|.84|.87>   GPU worker selection
+    --mission <name>      Mission name for output paths
+
+Workers:
+  .84: /root/ai-video/lingbot-map/.venv/bin/python demo.py ...
+  .87: /home/floppyrj45/ai-video/lingbot-map/.venv/bin/python demo.py ...
+
+Auto: pick by lowest GPU memory usage (nvidia-smi via SSH).
+
+Flow:
+1. Kill any stale demo.py on worker before starting
+2. rsync frames .83 → worker /root/cosma-frames-tmp/
+3. SSH launch demo.py in background; poll for PLY file; kill viser server once PLY done
+4. Retrieve PLY + NPZ → .83 ~/cosma-pipeline/data/<mission>/ply/<AUV>/<segment>.{ply,npz}
+5. Cleanup worker temp dir
+6. Log to SQLite: duration, GPU peak mem, nb points in PLY
+
+Usage:
+    python3 05_inference.py --frames-dir ~/cosma-pipeline/data/20260505-Lepradet/frames/AUV210/GX019837 --worker auto --mission 20260505-Lepradet
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+import yaml
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from orchestrator.db import init_db, get_conn, upsert_job, record_metric, now_iso
+
+PIPELINE_BASE = Path(os.environ.get("COSMA_PIPELINE_BASE", "/home/cosma/cosma-pipeline"))
+
+def _load_inference_cfg() -> dict:
+    """Load inference params from thresholds.yaml, with sane defaults."""
+    cfg_path = Path(__file__).parent.parent / "config" / "thresholds.yaml"
+    try:
+        data = yaml.safe_load(cfg_path.read_text())
+        return data.get("inference", {})
+    except Exception:
+        return {}
+
+_INF_CFG = _load_inference_cfg()
+
+WORKERS = {
+    ".84": {
+        "host": "192.168.0.84",
+        "user": "root",
+        "ai_dir": "/root/ai-video/lingbot-map",
+        "venv": "/root/ai-video/lingbot-map/.venv/bin/python",
+        "tmp_dir": "/root/cosma-frames-tmp",
+    },
+    ".87": {
+        "host": "192.168.0.87",
+        "user": "floppyrj45",
+        "ai_dir": "/home/floppyrj45/ai-video/lingbot-map",
+        "venv": "/home/floppyrj45/ai-video/lingbot-map/.venv/bin/python",
+        "tmp_dir": "/home/floppyrj45/cosma-frames-tmp",
+    },
+}
+
+
+def get_gpu_mem_used(worker_key: str) -> int:
+    """Return GPU memory used in MB via SSH nvidia-smi. Returns 99999 on error."""
+    w = WORKERS[worker_key]
+    cmd = [
+        "ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=5",
+        f"{w['user']}@{w['host']}",
+        "nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits 2>/dev/null | head -1"
+    ]
+    try:
+        r = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+        return int(r.stdout.strip())
+    except Exception:
+        return 99999
+
+
+def kill_stale_demo_py(worker_key: str) -> None:
+    """Kill any lingering demo.py processes on worker before starting new inference."""
+    w = WORKERS[worker_key]
+    ssh_target = f"{w['user']}@{w['host']}"
+    try:
+        subprocess.run(
+            ["ssh", "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=10",
+             ssh_target, "pkill -9 -f demo.py 2>/dev/null; sleep 1; echo stale_killed"],
+            capture_output=True, text=True, timeout=15,
+        )
+        print(f"  [05] Stale demo.py killed on {worker_key}")
+    except Exception as e:
+        print(f"  [05] Warning: kill_stale failed on {worker_key}: {e}")
+
+
+def pick_worker() -> str:
+    """Auto-select worker with lowest GPU memory usage."""
+    best = None
+    best_mem = 99999
+    for key in WORKERS:
+        mem = get_gpu_mem_used(key)
+        print(f"  [05] Worker {key}: GPU mem={mem}MB")
+        if mem < best_mem:
+            best_mem = mem
+            best = key
+    if best is None:
+        raise RuntimeError("No GPU worker available")
+    print(f"  [05] Selected worker {best}")
+    return best
+
+
+def count_ply_points(ply_path: Path) -> int:
+    """Count vertex count in PLY file header."""
+    try:
+        with open(ply_path, "rb") as f:
+            for _ in range(30):
+                line = f.readline().decode("ascii", errors="ignore").strip()
+                if line.startswith("element vertex"):
+                    return int(line.split()[-1])
+    except Exception:
+        pass
+    return 0
+
+
+def run_inference(frames_dir: Path, worker_key: str, mission_name: str,
+                  auv_id: str, segment: str) -> dict:
+    """Run lingbot-map on one segment. Returns metrics."""
+    w = WORKERS[worker_key]
+    host = w["host"]
+    user = w["user"]
+    ssh_target = f"{user}@{host}"
+    worker_frames = f"{w['tmp_dir']}/{mission_name}/{auv_id}/{segment}"
+    ply_remote = f"{w['tmp_dir']}/{mission_name}/{auv_id}/{segment}.ply"
+    npz_remote = f"{w['tmp_dir']}/{mission_name}/{auv_id}/{segment}.npz"
+
+    out_dir = PIPELINE_BASE / "data" / mission_name / "ply" / auv_id
+    out_dir.mkdir(parents=True, exist_ok=True)
+    out_ply = out_dir / f"{segment}.ply"
+    out_npz = out_dir / f"{segment}.npz"
+
+    if out_ply.exists() and out_ply.stat().st_size > 1000:
+        n_pts = count_ply_points(out_ply)
+        print(f"  [05] {auv_id}/{segment}: cached PLY ({n_pts} pts)")
+        return {"cached": True, "ply": str(out_ply), "n_points": n_pts}
+
+    metrics = {
+        "auv_id": auv_id,
+        "segment": segment,
+        "worker": worker_key,
+        "status": "ok",
+    }
+
+    # Step 0: kill any stale demo.py on worker
+    kill_stale_demo_py(worker_key)
+
+    # Step 1: create remote temp dir + rsync frames
+    print(f"  [05] rsync {frames_dir} → {ssh_target}:{worker_frames}...")
+    subprocess.run(
+        ["ssh", "-o", "StrictHostKeyChecking=no", ssh_target,
+         f"mkdir -p {worker_frames}"],
+        check=True, timeout=15,
+    )
+    r = subprocess.run(
+        ["rsync", "-az", "--delete",
+         str(frames_dir) + "/",
+         f"{ssh_target}:{worker_frames}/"],
+        capture_output=True, text=True, timeout=600,
+    )
+    if r.returncode != 0:
+        metrics["status"] = "error"
+        metrics["error"] = f"rsync failed: {r.stderr[-200:]}"
+        return metrics
+    print(f"  [05] rsync done")
+
+    # Step 2: build demo.py command -- params from thresholds.yaml[inference]
+    checkpoint = f"{w['ai_dir']}/checkpoints/lingbot-map/lingbot-map.pt"
+    inf_mode = _INF_CFG.get("mode", "streaming")
+    conf_thr = _INF_CFG.get("ply_conf_threshold", 1.5)
+    kf_interval = _INF_CFG.get("keyframe_interval", 1)
+    max_frames = _INF_CFG.get("max_frame_num", 1024)
+    use_offload = _INF_CFG.get("offload_to_cpu", False)
+    offload_flag = "--offload_to_cpu" if use_offload else "--no-offload_to_cpu"
+
+    if inf_mode == "windowed":
+        window_size = _INF_CFG.get("window_size", 64)
+        overlap_size = _INF_CFG.get("overlap_size", 16)
+        mode_flags = (
+            f"--mode windowed "
+            f"--window_size {window_size} "
+            f"--overlap_size {overlap_size} "
+        )
+    else:  # streaming (default, validated GX049839_v2 146M pts)
+        mode_flags = (
+            f"--mode streaming "
+            f"--keyframe_interval {kf_interval} "
+            f"--max_frame_num {max_frames} "
+        )
+
+    inf_timeout = int(_INF_CFG.get("inference_timeout_s", 10800))
+
+    # Remote script: launch demo.py in background, poll for PLY, kill viser when done
+    # This avoids the SSH blocking on the viser server that starts after inference
+    remote_script = f"""#!/bin/bash
+set -e
+PLY={ply_remote}
+LOG=/tmp/cosma_demo_{segment}.log
+# Launch demo.py in background
+nohup {w['venv']} {w['ai_dir']}/demo.py \\
+  --model_path {checkpoint} \\
+  --image_folder {worker_frames} \\
+  {mode_flags}--ply_conf_threshold {conf_thr} \\
+  --save_ply  \\
+  --save_poses {npz_remote} \\
+  --use_sdpa {offload_flag} \\
+  >  2>&1 &
+DEMO_PID=
+echo "demo.py PID=" >&2
+# Poll for PLY file (check every 30s)
+WAITED=0
+while [  -lt {inf_timeout} ]; do
+  if [ -f "" ] && [ $(wc -c < "") -gt 100 ]; then
+    sleep 10  # let write finish
+    echo "PLY_DONE size=$(wc -c < )" >&2
+    kill  2>/dev/null || true
+    exit 0
+  fi
+  # Check if process died with error
+  if ! kill -0  2>/dev/null; then
+    echo "Process died early" >&2
+    exit 1
+  fi
+  sleep 30
+  WAITED=30
+done
+echo "TIMEOUT after {inf_timeout}s" >&2
+kill -9  2>/dev/null || true
+exit 2
+"""
+
+    print(f"  [05] Launching inference on {host} (background+poll, timeout={inf_timeout}s)...")
+    t0 = time.time()
+    r = subprocess.run(
+        ["ssh", "-o", "StrictHostKeyChecking=no", ssh_target,
+         "bash -s"],
+        input=remote_script,
+        capture_output=True, text=True, timeout=inf_timeout + 60,
+    )
+    elapsed = time.time() - t0
+    metrics["inference_s"] = round(elapsed, 1)
+
+    if r.returncode != 0:
+        metrics["status"] = "error"
+        metrics["error"] = (r.stdout + r.stderr)[-500:]
+        print(f"  [05] inference error: {metrics['error'][-200:]}")
+        return metrics
+
+    print(f"  [05] Inference done in {elapsed:.1f}s (returncode={r.returncode})")
+
+    metrics["gpu_peak_mb"] = get_gpu_mem_used(worker_key)
+
+    # Step 4: rsync PLY + NPZ back
+    print(f"  [05] Retrieving PLY + NPZ...")
+    for remote, local in [(ply_remote, out_ply), (npz_remote, out_npz)]:
+        r2 = subprocess.run(
+            ["rsync", "-az", f"{ssh_target}:{remote}", str(local)],
+            capture_output=True, text=True, timeout=120,
+        )
+        if r2.returncode != 0:
+            print(f"  [05] Warning: rsync back failed for {remote}: {r2.stderr[-100:]}")
+
+    # Step 5: cleanup worker
+    subprocess.run(
+        ["ssh", "-o", "StrictHostKeyChecking=no", ssh_target,
+         f"rm -rf {worker_frames} {ply_remote} {npz_remote}"],
+        timeout=30,
+    )
+
+    # Count PLY points
+    n_pts = count_ply_points(out_ply) if out_ply.exists() else 0
+    metrics["n_points"] = n_pts
+    metrics["ply"] = str(out_ply)
+    print(f"  [05] PLY: {n_pts} points → {out_ply}")
+
+    return metrics
+
+
+def process_frames_dir(frames_dir: Path, worker_key: str, mission_name: str) -> list[dict]:
+    """Process a directory of frames (single segment or AUV tree)."""
+    direct_frames = list(frames_dir.glob("frame_*.jpg"))
+
+    if direct_frames:
+        parts = frames_dir.parts
+        auv_id = frames_dir.parent.name if len(parts) >= 2 else "UNKNOWN"
+        segment = frames_dir.name
+        return [run_inference(frames_dir, worker_key, mission_name, auv_id, segment)]
+
+    all_metrics = []
+    for auv_dir in sorted(frames_dir.iterdir()):
+        if not auv_dir.is_dir():
+            continue
+        auv_id = auv_dir.name
+        for seg_dir in sorted(auv_dir.iterdir()):
+            if not seg_dir.is_dir():
+                continue
+            frames = list(seg_dir.glob("frame_*.jpg"))
+            if not frames:
+                continue
+            print(f"\n[05] === {auv_id}/{seg_dir.name}: {len(frames)} frames ===")
+            # Guard: min frames required for model (RoPE/attention)
+            min_frames = int(_INF_CFG.get("min_frames_for_inference", 32))
+            if len(frames) < min_frames:
+                print(f"  [05] SKIP {auv_id}/{seg_dir.name}: {len(frames)} frames < {min_frames} min")
+                init_db()
+                with get_conn() as conn_mf:
+                    mr = conn_mf.execute("SELECT id FROM missions WHERE name=?", (mission_name,)).fetchone()
+                    if mr:
+                        upsert_job(conn_mf, mr["id"], auv_id, seg_dir.name, "05_inference",
+                                   status="skipped",
+                                   error_msg=f"frames_too_few={len(frames)}<{min_frames}")
+                continue
+
+            m = run_inference(seg_dir, worker_key, mission_name, auv_id, seg_dir.name)
+            all_metrics.append(m)
+
+            init_db()
+            with get_conn() as conn:
+                mission_row = conn.execute(
+                    "SELECT id FROM missions WHERE name=?", (mission_name,)
+                ).fetchone()
+                if mission_row and not m.get("cached"):
+                    job_id = upsert_job(
+                        conn, mission_row["id"], auv_id, seg_dir.name, "05_inference",
+                        status="done" if m.get("status") == "ok" else m.get("status", "error"),
+                        output_path=m.get("ply", ""),
+                    )
+                    record_metric(conn, job_id, "ply_points", value=m.get("n_points", 0),
+                                  pass_fail="pass" if m.get("n_points", 0) > 100 else "fail")
+                    if "inference_s" in m:
+                        record_metric(conn, job_id, "inference_s", value=m["inference_s"])
+                    if "gpu_peak_mb" in m:
+                        record_metric(conn, job_id, "gpu_peak_mb", value=m["gpu_peak_mb"])
+
+    return all_metrics
+
+
+def main():
+    ap = argparse.ArgumentParser(description="Stage 05 — lingbot-map inference")
+    ap.add_argument("--frames-dir", type=Path, required=True)
+    ap.add_argument("--worker", type=str, default="auto", choices=["auto", ".84", ".87"])
+    ap.add_argument("--mission", type=str, required=True)
+    args = ap.parse_args()
+
+    worker = args.worker
+    if worker == "auto":
+        worker = pick_worker()
+
+    metrics = process_frames_dir(args.frames_dir, worker, args.mission)
+
+    print("\n=== Stage 05 summary ===")
+    total_pts = sum(m.get("n_points", 0) for m in metrics)
+    ok = sum(1 for m in metrics if m.get("status") == "ok" or m.get("cached"))
+    print(f"  Segments OK: {ok}/{len(metrics)}, total PLY points: {total_pts}")
+    for m in metrics:
+        print(f"  {m.get('auv_id','?')}/{m.get('segment','?')}: "
+              f"{m.get('n_points',0)} pts "
+              f"[{m.get('status','cached' if m.get('cached') else '?')}]")
+
+
+if __name__ == "__main__":
+    main()
--- a/pipeline/stages/init.py
+++ b/pipeline/stages/init.py
--- a/pipeline/stages/lib_frame_qc.py
+++ b/pipeline/stages/lib_frame_qc.py
@@ -0,0 +1,83 @@
+"""Frame quality scoring for underwater footage.
+
+For each frame, compute:
+  - laplacian_var: focus/sharpness (cv2.Laplacian variance)
+  - contrast: stddev of grayscale
+  - blue_dominance: mean(B - R), positive = water dominant
+  - mean_r/g/b: per-channel means
+
+Classification (priority order):
+  - mean_r > mean_g + 5 AND mean_r > mean_b + 5 → 'out_of_water'
+  - laplacian_var < 50 AND contrast < 25       → 'turbid_water'
+  - laplacian_var >= 80 AND contrast >= 35
+    AND blue_dominance > -10                   → 'bottom_visible'
+  - else                                       → 'water_no_bottom'
+"""
+from __future__ import annotations
+
+from collections import Counter
+from typing import Iterable
+
+import cv2
+import numpy as np
+
+
+def score_frame(frame_bgr: np.ndarray) -> dict:
+    """Return per-frame QC metrics + class label."""
+    gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
+    lap_var = float(cv2.Laplacian(gray, cv2.CV_64F).var())
+    contrast = float(gray.std())
+    b, g, r = cv2.split(frame_bgr)
+    mean_r = float(r.mean())
+    mean_g = float(g.mean())
+    mean_b = float(b.mean())
+    blue_dom = float(mean_b - mean_r)
+
+    if mean_r > mean_g + 5 and mean_r > mean_b + 5:
+        klass = "out_of_water"
+    elif lap_var < 50 and contrast < 25:
+        klass = "turbid_water"
+    elif lap_var >= 80 and contrast >= 35 and blue_dom > -10:
+        klass = "bottom_visible"
+    else:
+        klass = "water_no_bottom"
+
+    return {
+        "laplacian_var": round(lap_var, 2),
+        "contrast": round(contrast, 2),
+        "blue_dominance": round(blue_dom, 2),
+        "mean_r": round(mean_r, 1),
+        "mean_g": round(mean_g, 1),
+        "mean_b": round(mean_b, 1),
+        "class": klass,
+        "score_ok": klass == "bottom_visible",
+    }
+
+
+def score_image_file(path) -> dict | None:
+    """Load image with OpenCV and score it. Returns None on failure."""
+    img = cv2.imread(str(path))
+    if img is None:
+        return None
+    res = score_frame(img)
+    res["file"] = str(path)
+    return res
+
+
+def aggregate(scores: Iterable[dict]) -> dict:
+    """Aggregate a sequence of score_frame() dicts."""
+    scores = list(scores)
+    total = len(scores)
+    counts = Counter(s["class"] for s in scores)
+    bottom = counts.get("bottom_visible", 0)
+    return {
+        "frames_total": total,
+        "frames_bottom_visible": bottom,
+        "frames_out_of_water": counts.get("out_of_water", 0),
+        "frames_turbid": counts.get("turbid_water", 0),
+        "frames_water_no_bottom": counts.get("water_no_bottom", 0),
+        "bottom_visible_pct": round(100.0 * bottom / total, 1) if total else 0.0,
+    }
+
+
+CLASS_ORDER = ("bottom_visible", "water_no_bottom", "turbid_water", "out_of_water")
--- a/pipeline/veille/2026-05-11-2233-iter-1.md
+++ b/pipeline/veille/2026-05-11-2233-iter-1.md
@@ -0,0 +1,14 @@
+# Veille — 2026-05-11 22:33 UTC — iter-1
+
+## ArXiv (signaux forts)
+- **[2605.04672]** AI-Aided Advancements in AUV Navigation — fusion caméra+DVL+IMU IA, pertinent nav AUV
+- **BALTIC benchmark** — cross-domain 3D recon air/eau illumination variable
+- **3D Gaussian Splatting underwater** — spatiotemporal degradation-aware GS scènes turbides
+
+## GitHub
+- **LingBot-Map** (maj 3j) — streaming 20FPS 518×378 10k+ frames drift correction attention paginée — fort signal
+- **DUSt3R** actif, suivi normal
+- MonST3R / VGGT / MoGe : pas de maj 7j
+
+## Action possible
+Tester 3DGS underwater sur frames AUV210 turbides (0% bottom visible) comme alternative à lingbot reconstruction
--- a/pipeline/veille/2026-05-12-0430-iter-2.md
+++ b/pipeline/veille/2026-05-12-0430-iter-2.md
@@ -0,0 +1,23 @@
+# Veille COSMA reconstruction — iter-2 — 2026-05-12 04:30 UTC
+
+## arxiv underwater 3D (7 derniers jours)
+- UW-3DGS: Underwater 3D Reconstruction, Physics-Aware Gaussian Splatting (arxiv 2508.06169)
+- Visual enhancement + 3D representation underwater: review (arxiv 2505.01869)
+
+## arxiv AUV SLAM / point cloud
+- VISO: Robust Underwater Visual-Inertial-Sonar SLAM (arxiv 2601.01144) — VIS+sonar, fort intérêt pour pipeline USBL
+- RUSSO: Underwater SLAM stéréo+sonar+IMU (arxiv 2503.01434)
+- VIMS: Visual-Inertial-Magnetic-Sonar SLAM (arxiv 2506.15126)
+
+## Repos GitHub actifs
+- naver/dust3r (7k★): actif, base pipeline lingbot-map
+- Junyi42/monst3r (ICLR 2025): géométrie vidéo dynamique
+- facebookresearch/vggt (CVPR 2025 Best Paper): reconstruction per-frame
+- CUT3R: Continuous 3D Perception, mise à jour mars 2026
+
+## HuggingFace
+- Video-Depth-Anything-Small: depth video temps-réel
+- StereoAdapter: adaptation profondeur stéréo sous-marine
+
+## Signal fort
+VISO (arxiv 2601.01144): pipeline USBL+caméra+IMU pour AUV, pourrait remplacer pure-camera pose estimation dans stage 06_align.
--- a/pipeline/veille/2026-05-12-1650-iter-4.md
+++ b/pipeline/veille/2026-05-12-1650-iter-4.md
@@ -0,0 +1,26 @@
+# Veille iter-4 — 2026-05-12 16:50 UTC
+
+## Top signaux (8-9/10)
+
+- **ReefMapGS** arxiv.org/abs/2604.11992 — SLAM+3DGS 700m AUV, COLMAP-free, directement applicable COSMA (9/10)
+- **OceanSplat** (2026) — 3D Gaussian Splatting milieu turbide + trinocular consistency (9/10)
+- **BIND-USBL** arxiv.org/abs/2604.11861 — fusion IMU+USBL hétérogène ASV-AUV, delayed fusion = pattern réutilisable stage 06_align (9/10)
+- **LingBot-Map update** (27 avril) — keyframe_interval fix + long-video demo — update recommandé (8/10)
+- **PAS3R** HuggingFace — Pose-Adaptive Streaming 3D, long video = streaming AUV (8/10)
+- **AI-Aided AUV Navigation** arxiv.org/abs/2605.04672 — fusion INS+DVL+cam deep learning (8/10)
+
+## Signaux modérés (7/10)
+
+- Aquatic Neuromorphic Optical Flow arxiv.org/abs/2605.07653 — event cam AUV turbide
+- WaterSplat-SLAM RAL 2026 — SLAM monoculaire sous-marin photoréaliste
+
+## Repos actifs
+
+- lingbot-map (keyframe fix avril), awesome-dust3r (ecosystem DUSt3R/VGGT/CUT3R)
+- Matisse Ifremer — datasets flotte française
+
+## Recommandations
+
+1. **BIND-USBL** : lire pour stage 06_align (pattern fusion USBL+IMU déjà dispo)
+2. **LingBot-Map update** : Already up to date. sur .84/.87 avant prochaine iter
+3. **ReefMapGS** : évaluer comme alternative stage 06_align si PR #9/#12 mergés
--- a/pipeline/veille/2026-05-12-2246-iter-5.md
+++ b/pipeline/veille/2026-05-12-2246-iter-5.md
@@ -0,0 +1,26 @@
+# Veille Iter-5 — 2026-05-12 22:46 UTC
+
+## Arxiv / Papers
+
+| # | Titre | Signal | Score |
+|---|-------|--------|-------|
+| 1 | ReefMapGS | SLAM multimodal + Gaussian Splatting pour grandes scènes sous-marines avec fermeture de boucle | 9/10 |
+| 2 | Sonar-MASt3R | Fusion optico-acoustique temps réel pour environnements turbides — intéressant pour milieu turbide AUV | 8/10 |
+| 3 | WaterSplat-SLAM | SLAM monoculaire photoréaliste underwater, moindre dépendance stéréo | 8/10 |
+| 4 | Spatiotemporal Degradation-Aware 3DGS | Reconstruction scènes sous-marines avec dégradation temporelle (particules, courant) | 8/10 |
+| 5 | BALTIC Benchmark | Benchmark 3D reconstruction air/underwater avec variations d'illumination, utile pour QC comparaison | 7/10 |
+| 6 | Lost at Sea (Notre Dame) | AUV utilisant 3DGS pour navigation autonome et reconnaissance environnement | 7/10 |
+
+## GitHub / HuggingFace
+
+| Repo | Signal |
+|------|--------|
+| LingBot-Map | Commits récents (4 jours) — à tracker pour keyframe fixes |
+| dust3r/mast3r | Actifs, pas de release majeure dernière semaine |
+| Pixal3D (SIGGRAPH 2026) | 3D pixel-alignée, potentiellement utile pour poses denses |
+
+## Recommandation prochaine iteration
+
+- **ReefMapGS** : évaluer pour remplacement LingBot-Map sur grands segments (15m+)
+- **Sonar-MASt3R** : pertinent si Kogger SBP intégré dans pipeline — stage 06 USBL+cam pourrait utiliser composante acoustique
+- **BALTIC Benchmark** : utiliser pour QC comparatif sur segments AUV210 (turbide)
--- a/pipeline/veille/2026-05-13-1043-iter-7.md
+++ b/pipeline/veille/2026-05-13-1043-iter-7.md
@@ -0,0 +1,21 @@
+# Veille iter-7 — 2026-05-13 10:43 UTC
+
+## Papers / Signaux (6 total)
+
+| # | Titre | Ref | Score | Pertinence COSMA |
+|---|-------|-----|-------|-----------------|
+| 1 | Aquatic Neuromorphic Optical Flow | arXiv 2605.07653 (5j) | 9/10 | Optique turbide robuste, temps-réel, léger → stage06_align |
+| 2 | MAGS-SLAM: Multi-Agent 3DGS SLAM | arXiv 2605.10760 (2j) | 8/10 | SLAM 3DGS multi-robot, cohérence photométrique → futur multi-AUV |
+| 3 | AI Platform AUV 3DGS (Notre-Dame) | engineering.nd.edu (5j) | 9/10 | 3DGS ellipsoïdes flous underwater, navigation AUV pré-chargée |
+| 4 | MV-DUSt3R+ | GitHub facebookresearch (7j) | 8/10 | DUSt3R v2 rapide (2s), baseline comparaison stage05 |
+| 5 | MonST3R | GitHub Junyi42 (ICLR 2025) | 7/10 | Géométrie robuste motion/occlusion → transition segments |
+| 6 | LingBot-Map | GitHub robbyant (5j) | 9/10 | Màj streaming, vérifier diff vs version .84/.87 installée |
+
+## Repos actifs (7j)
+- **lingbot-map** (robbyant) : dernière màj 5j — comparer avec version installée .84/.87
+- **dust3r / monst3r** : mises à jour README et poids — rien d'urgent
+
+## Recommandations prochaines
+1. Évaluer Aquatic Neuromorphic Optical Flow pour stage06_align (turbide)
+2. Benchmarker 3DGS (MAGS-SLAM ou Notre-Dame) sur 1 segment AUV210
+3. Mettre à jour lingbot-map .84/.87 si diff significatif
--- a/pipeline/veille/2026-05-13-1643-iter-8.md
+++ b/pipeline/veille/2026-05-13-1643-iter-8.md
@@ -0,0 +1,17 @@
+# Veille — 2026-05-13 16:43 UTC — Iter 8
+
+## Signaux forts
+
+| Titre | Signal | Raison |
+|-------|--------|--------|
+| LingBot-Map accelerated (2026-04-27 update) | 10/10 | Streaming 3D foundation model, maj 16j, optimisations directement applicables pipeline COSMA |
+| ND AI Platform : 3DGS + Bayesian uncertainty | 9/10 | Gaussian splatting + quantification d'incertitude — utile pour USBL fusion et scoring confiance PLY |
+| GPU COLMAP+3DGS Deploy Guide | 7/10 | COLMAP SfM → 3DGS sur GPU, alternative si lingbot-map insuffisant segments dégradés |
+| Correlator3D 3DGS integration | 6/10 | Photogrammetrie + Gaussian splatting pour meshes naturels, contexte fond marin |
+
+## Pas de nouveaux hits 7j
+- dust3r, monst3r, vggt, mee-deepreefmap : aucune mise à jour récente
+
+## Recommandations
+- Tracker la maj LingBot-Map 2026-04-27 : vérifier si les optimisations vitesse sont dans le checkout .84
+- Évaluer ND 3DGS Bayesian pour étape post-PLY : scoring incertitude peut améliorer stitch ICP
--- a/scripts/coverage_swath.py
+++ b/scripts/coverage_swath.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+"""Coverage swath QC plot — project each frame footprint on ground.
+
+Usage:
+  python3 coverage_swath.py --traj-csv /tmp/dvl_loopclosed_GX039839.csv \
+    --frames-dir /home/cosma/...AUV210/GX039839 \
+    --altitude 1.5 --fov-h 122 --fov-v 80 --out /tmp/coverage_GX039839.png
+"""
+import argparse, csv, math
+from pathlib import Path
+import numpy as np
+import cv2
+
+def compute_qc(frame_path):
+    """R<G-5 && R<B-5 underwater test, return ratio of bottom_visible-ish pixels."""
+    img = cv2.imread(str(frame_path), cv2.IMREAD_COLOR)
+    if img is None: return 0.0
+    b, g, r = cv2.split(img)
+    mask = (r < g.astype(int) - 5) & (r < b.astype(int) - 5)
+    # contrast: stddev of gray channel
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    if gray.std() < 20: return 0.0  # turbid / blurry
+    return float(mask.mean())
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--traj-csv', required=True)
+    ap.add_argument('--frames-dir', required=True)
+    ap.add_argument('--altitude', type=float, default=1.5)
+    ap.add_argument('--fov-h', type=float, default=122.0)
+    ap.add_argument('--fov-v', type=float, default=80.0)
+    ap.add_argument('--out', required=True)
+    ap.add_argument('--x-col', default='east_m_corr')
+    ap.add_argument('--y-col', default='north_m_corr')
+    ap.add_argument('--label', default='segment')
+    ap.add_argument('--heading-csv', default=None, help='separate CSV with heading_deg per frame')
+    ap.add_argument('--sample-every', type=int, default=10, help='draw every N frames')
+    args = ap.parse_args()
+    
+    # Load trajectory
+    rows = list(csv.DictReader(open(args.traj_csv)))
+    # autodetect col names
+    cols = rows[0].keys()
+    if args.x_col not in cols: args.x_col = 'east_m' if 'east_m' in cols else 'x'
+    if args.y_col not in cols: args.y_col = 'north_m' if 'north_m' in cols else 'y'
+    print(f'[cov] {len(rows)} rows, x_col={args.x_col} y_col={args.y_col}', flush=True)
+    
+    # Load heading from a heading CSV (or assume 0)
+    headings = {}
+    if args.heading_csv:
+        for r in csv.DictReader(open(args.heading_csv)):
+            headings[int(r['frame_idx'])] = float(r['heading_deg'])
+        print(f'[cov] {len(headings)} headings loaded', flush=True)
+    elif 'heading_deg' in cols:
+        for r in rows:
+            headings[int(r['frame_idx'])] = float(r['heading_deg'])
+    
+    frames_dir = Path(args.frames_dir)
+    # Footprint dimensions at altitude
+    half_w = args.altitude * math.tan(math.radians(args.fov_h/2))
+    half_h = args.altitude * math.tan(math.radians(args.fov_v/2))
+    print(f'[cov] footprint at alt={args.altitude}m: {2*half_w:.2f}m × {2*half_h:.2f}m', flush=True)
+    
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    from matplotlib.patches import Rectangle
+    from matplotlib.transforms import Affine2D
+    from matplotlib.collections import PatchCollection
+    
+    fig, axes = plt.subplots(1, 2, figsize=(20, 10))
+    ax_cov, ax_traj = axes[0], axes[1]
+    
+    # Collect rectangles + colors
+    rects = []
+    colors = []
+    qc_values = []
+    xs = []; ys = []
+    
+    for r in rows[::args.sample_every]:
+        fi = int(r['frame_idx'])
+        x = float(r[args.x_col])
+        y = float(r[args.y_col])
+        xs.append(x); ys.append(y)
+        hdg = headings.get(fi, 0.0)
+        # QC
+        fpath = frames_dir / f'frame_{fi+1:05d}.jpg'
+        if fpath.exists():
+            qc = compute_qc(fpath)
+        else:
+            qc = 0.0
+        qc_values.append(qc)
+        
+        # Build rotated rectangle
+        # rectangle in body frame: x = +/- half_w (right), y = +/- half_h (forward)
+        # but in world: rotated by heading
+        from matplotlib.patches import Polygon
+        corners_body = np.array([
+            [-half_w, -half_h],
+            [+half_w, -half_h],
+            [+half_w, +half_h],
+            [-half_w, +half_h],
+        ])
+        # heading rotation (clockwise from north, so for math invert)
+        th = math.radians(hdg)
+        R = np.array([[math.cos(th), math.sin(th)],
+                      [-math.sin(th), math.cos(th)]])
+        corners_world = corners_body @ R.T + np.array([x, y])
+        rects.append(corners_world)
+        colors.append(qc)
+    
+    # Plot coverage
+    from matplotlib.patches import Polygon
+    for corners, qc in zip(rects, colors):
+        poly = Polygon(corners, alpha=0.10, edgecolor='black', linewidth=0.1,
+                       facecolor=plt.cm.RdYlGn(qc * 1.5 if qc < 0.7 else 1.0))
+        ax_cov.add_patch(poly)
+    
+    ax_cov.plot(xs, ys, '-k', linewidth=0.5, alpha=0.5)
+    ax_cov.plot(xs[0], ys[0], 'go', markersize=12, label='start')
+    ax_cov.plot(xs[-1], ys[-1], 'r^', markersize=12, label='end')
+    ax_cov.set_xlabel('East (m)'); ax_cov.set_ylabel('North (m)')
+    ax_cov.set_title(f'Coverage swath — {args.label}\n{len(rects)} footprints @ alt={args.altitude}m FOV {args.fov_h}°×{args.fov_v}°\n(green=bottom visible, red=hors-eau/turbid)')
+    ax_cov.set_aspect('equal'); ax_cov.legend(); ax_cov.grid(True, alpha=0.3)
+    
+    # Trajectory only with QC color
+    sc = ax_traj.scatter(xs, ys, c=colors, cmap='RdYlGn', s=12, vmin=0, vmax=0.7)
+    plt.colorbar(sc, ax=ax_traj, label='bottom_visible ratio')
+    ax_traj.plot(xs[0], ys[0], 'go', markersize=12)
+    ax_traj.plot(xs[-1], ys[-1], 'r^', markersize=12)
+    ax_traj.set_xlabel('East (m)'); ax_traj.set_ylabel('North (m)')
+    ax_traj.set_title(f'Trajectoire colorée par QC ({len(xs)} points)'); ax_traj.set_aspect('equal'); ax_traj.grid(True, alpha=0.3)
+    
+    plt.suptitle(f'Acquisition QC swath — {args.label}', fontsize=14)
+    plt.tight_layout()
+    plt.savefig(args.out, dpi=120, bbox_inches='tight')
+    print(f'[plot] {args.out}', flush=True)
+
+if __name__ == '__main__': main()
--- a/scripts/dvl_focal_sweep.py
+++ b/scripts/dvl_focal_sweep.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+"""Test multiple focal lengths on DVL and compare trajectory drift."""
+import subprocess, csv, sys, math
+from pathlib import Path
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+FRAMES = '/home/cosma/cosma-pipeline/data/20260505-Lepradet/frames/AUV210/GX039839'
+START = '2026-05-05T08:33:41'
+LABEL = 'GX039839'
+SCRIPT = '/home/cosma/cosma-qc/scripts/dvl_optical_full.py'
+
+# focal in px equivalent to W/2 / tan(fov/2). W=518
+# fov 100°→f=217, 110°→f=185, 122°→f=143, 130°→f=121, 140°→f=94, 150°→f=70
+# But focal in px doesn't directly map to FOV unless we use that conversion. We pass --fov-deg.
+fovs = [90, 100, 110, 122, 135, 150]
+
+results = []
+for fov in fovs:
+    out_csv = f'/tmp/sweep_fov{fov}.csv'
+    print(f'[sweep] fov={fov}', flush=True)
+    r = subprocess.run(['python3', SCRIPT, '--frames-dir', FRAMES, '--altitude', '1.5',
+                        '--fov-deg', str(fov), '--fps', '1.0', '--start-iso', START,
+                        '--label', LABEL, '--out', out_csv], capture_output=True, text=True, timeout=600)
+    if r.returncode != 0:
+        print(f'  FAIL: {r.stderr[-300:]}', flush=True)
+        continue
+    # parse last position + drift metrics
+    rows = list(csv.DictReader(open(out_csv)))
+    e = [float(r['east_m']) for r in rows]
+    n = [float(r['north_m']) for r in rows]
+    h = [float(r['heading_deg']) for r in rows]
+    end_x, end_y = e[-1], n[-1]
+    end_dist = math.sqrt(end_x**2 + end_y**2)
+    path_len = sum(math.sqrt((e[i]-e[i-1])**2 + (n[i]-n[i-1])**2) for i in range(1, len(e)))
+    bbox = (max(e)-min(e), max(n)-min(n))
+    results.append({'fov': fov, 'csv': out_csv, 'end_x': end_x, 'end_y': end_y, 
+                    'end_dist': end_dist, 'path_len': path_len, 'bbox': bbox, 'rows': rows,
+                    'e_arr': e, 'n_arr': n, 'h_arr': h})
+    print(f'  end=({end_x:.1f},{end_y:.1f}) dist={end_dist:.1f}m path={path_len:.1f}m bbox={bbox}', flush=True)
+
+# Plot all trajectories
+fig, axes = plt.subplots(2, 3, figsize=(18, 12))
+axes = axes.flatten()
+for ax, res in zip(axes, results):
+    ax.plot(res['e_arr'], res['n_arr'], '-b', linewidth=0.8)
+    ax.plot(res['e_arr'][0], res['n_arr'][0], 'go', markersize=8)
+    ax.plot(res['e_arr'][-1], res['n_arr'][-1], 'r^', markersize=8)
+    ax.set_xlabel('East (m)'); ax.set_ylabel('North (m)')
+    ax.set_title(f'FOV={res["fov"]}° bbox=({res["bbox"][0]:.0f}×{res["bbox"][1]:.0f})m\nend_dist={res["end_dist"]:.1f}m path={res["path_len"]:.0f}m')
+    ax.set_aspect('equal'); ax.grid(True, alpha=0.3)
+plt.suptitle(f'DVL focal sweep — {LABEL} (assume closed-loop survey → smaller end_dist=better)')
+plt.tight_layout()
+plt.savefig('/tmp/sweep_focal.png', dpi=110, bbox_inches='tight')
+print('[plot] /tmp/sweep_focal.png', flush=True)
+
+# Summary
+print('\n=== Summary ===')
+for r in sorted(results, key=lambda x: x['end_dist']):
+    print(f"FOV={r['fov']}°: end_dist={r['end_dist']:.1f}m  path={r['path_len']:.0f}m  bbox={r['bbox'][0]:.0f}×{r['bbox'][1]:.0f}m")
--- a/scripts/dvl_optical.py
+++ b/scripts/dvl_optical.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+"""Optical DVL — mean optical flow per frame → 2D ground velocity integration.
+
+Assumes downward-looking camera at constant altitude above ground.
+Convert pixel flow to metric using altitude / focal_length.
+
+Pipeline:
+  1. Dense Farneback flow between consecutive frames
+  2. Median flow vector (px) → robust against outliers
+  3. v_m = flow_px * altitude_m / focal_px  (instant velocity in cam plane)
+  4. Integrate → trajectory (cam-frame XY)
+  5. Optional: apply IMU heading rotation per frame for body-frame correction
+
+Usage:
+  python3 dvl_optical.py --frames-dir <dir> --altitude 1.5 --fps 1.0 \
+    --start-iso 2026-05-05T08:33:41 --label GX039839 \
+    --out /tmp/dvl.csv --plot /tmp/dvl.png [--ref-csv /tmp/GX039839_camera.csv]
+"""
+import argparse, csv, math, sys
+from pathlib import Path
+from datetime import datetime
+import numpy as np
+import cv2
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--frames-dir', required=True)
+    ap.add_argument('--altitude', type=float, default=1.5, help='Camera height above bottom (m)')
+    ap.add_argument('--fov-deg', type=float, default=122.0, help='GoPro horizontal FOV')
+    ap.add_argument('--fps', type=float, default=1.0)
+    ap.add_argument('--start-iso', default='2026-05-05T00:00:00')
+    ap.add_argument('--label', default='segment')
+    ap.add_argument('--out', required=True)
+    ap.add_argument('--plot', default=None)
+    ap.add_argument('--ref-csv', default=None)
+    ap.add_argument('--method', choices=['farneback','lk'], default='farneback')
+    args = ap.parse_args()
+    
+    frames = sorted(Path(args.frames_dir).glob('frame_*.jpg'))
+    print(f'[dvl] {len(frames)} frames', flush=True)
+    
+    W, H = 518, 294
+    f = (W/2) / math.tan(math.radians(args.fov_deg/2))
+    # scale factor : 1 px flow at altitude_m = (altitude_m / focal_px) meters
+    px_to_m = args.altitude / f
+    print(f'[dvl] focal_px={f:.1f} altitude={args.altitude}m -> px_to_m={px_to_m:.5f}', flush=True)
+    
+    t0 = datetime.fromisoformat(args.start_iso).timestamp()
+    rows = []
+    rows.append({'frame_idx': 0, 'ts_s': t0, 'flow_x_px': 0, 'flow_y_px': 0, 'speed_mps': 0, 'x_m': 0, 'y_m': 0})
+    
+    prev = cv2.imread(str(frames[0]), cv2.IMREAD_GRAYSCALE)
+    x_cum, y_cum = 0.0, 0.0
+    
+    for i in range(1, len(frames)):
+        curr = cv2.imread(str(frames[i]), cv2.IMREAD_GRAYSCALE)
+        if curr is None: continue
+        
+        if args.method == 'farneback':
+            flow = cv2.calcOpticalFlowFarneback(prev, curr, None, 0.5, 3, 21, 3, 5, 1.2, 0)
+            fx = np.median(flow[..., 0])
+            fy = np.median(flow[..., 1])
+        else:  # lk on grid
+            h, w = prev.shape
+            pts = np.array([[x, y] for y in range(20, h-20, 30) for x in range(20, w-20, 30)], dtype=np.float32).reshape(-1, 1, 2)
+            curr_pts, status, err = cv2.calcOpticalFlowPyrLK(prev, curr, pts, None, winSize=(21,21))
+            good = status.flatten() == 1
+            if good.sum() < 10:
+                fx = fy = 0
+            else:
+                d = (curr_pts - pts)[good].reshape(-1, 2)
+                fx = np.median(d[:, 0]); fy = np.median(d[:, 1])
+        
+        # Convert px/frame -> m/frame
+        dx_m = fx * px_to_m
+        dy_m = fy * px_to_m
+        # AUV motion is OPPOSITE to optical flow direction (camera moves opposite to apparent ground motion)
+        # If ground appears to move +x in image, AUV moves -x in world
+        x_cum -= dx_m
+        y_cum -= dy_m
+        speed_mps = math.sqrt(dx_m**2 + dy_m**2) * args.fps
+        
+        rows.append({'frame_idx': i, 'ts_s': t0 + i/args.fps, 'flow_x_px': float(fx), 'flow_y_px': float(fy),
+                     'speed_mps': speed_mps, 'x_m': x_cum, 'y_m': y_cum})
+        prev = curr
+        
+        if i % 100 == 0:
+            print(f'[dvl] {i}/{len(frames)} flow=({fx:.2f},{fy:.2f}) speed={speed_mps:.3f}m/s pos=({x_cum:.2f},{y_cum:.2f})', flush=True)
+    
+    print(f'[dvl] done. Final position: ({x_cum:.2f}, {y_cum:.2f}) m', flush=True)
+    
+    with open(args.out, 'w', newline='') as f:
+        w = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
+        w.writeheader(); w.writerows(rows)
+    print(f'[out] {args.out}', flush=True)
+    
+    if args.plot:
+        import matplotlib
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+        fig, axes = plt.subplots(2, 2, figsize=(14, 12))
+        ax_xy, ax_speed, ax_flow, ax_cmp = axes[0,0], axes[0,1], axes[1,0], axes[1,1]
+        x = [r['x_m'] for r in rows]; y = [r['y_m'] for r in rows]
+        ax_xy.plot(x, y, '-b', linewidth=1.2)
+        ax_xy.plot(x[0], y[0], 'go', markersize=10, label='start')
+        ax_xy.plot(x[-1], y[-1], 'r^', markersize=10, label='end')
+        ax_xy.set_xlabel('X (m)'); ax_xy.set_ylabel('Y (m)'); ax_xy.set_title(f'DVL trajectory (altitude={args.altitude}m)')
+        ax_xy.set_aspect('equal'); ax_xy.legend(); ax_xy.grid(True, alpha=0.3)
+        
+        speeds = [r['speed_mps'] for r in rows]
+        ax_speed.plot(range(len(rows)), speeds, '-r', linewidth=0.8)
+        ax_speed.set_xlabel('Frame'); ax_speed.set_ylabel('Speed (m/s)'); ax_speed.set_title('Speed over time'); ax_speed.grid(True, alpha=0.3)
+        
+        fx_arr = [r['flow_x_px'] for r in rows]; fy_arr = [r['flow_y_px'] for r in rows]
+        ax_flow.plot(fx_arr, label='flow_x px', alpha=0.6)
+        ax_flow.plot(fy_arr, label='flow_y px', alpha=0.6)
+        ax_flow.set_xlabel('Frame'); ax_flow.set_ylabel('Median flow (px)'); ax_flow.set_title('Median optical flow'); ax_flow.legend(); ax_flow.grid(True, alpha=0.3)
+        
+        # comparison with reference
+        if args.ref_csv:
+            try:
+                with open(args.ref_csv) as ff:
+                    refrows = [r for r in csv.DictReader(ff) if r.get('segment','')==args.label or r.get('label','')==args.label]
+                rx = [float(r['x']) for r in refrows]
+                ry = [float(r['y']) for r in refrows]
+                ax_cmp.plot(x, y, '-b', linewidth=1.2, label='DVL optical', alpha=0.7)
+                ax_cmp.plot(rx, ry, '-r', linewidth=1.2, label='lingbot', alpha=0.7)
+                ax_cmp.plot(x[0], y[0], 'go', markersize=8)
+                ax_cmp.set_xlabel('X (m)'); ax_cmp.set_ylabel('Y (m)'); ax_cmp.set_title('DVL vs Lingbot (same scale, x/y)'); ax_cmp.set_aspect('equal')
+                ax_cmp.legend(); ax_cmp.grid(True, alpha=0.3)
+            except Exception as e:
+                print(f'[plot] ref fail: {e}', flush=True)
+        else:
+            ax_cmp.set_title('(no reference)')
+        
+        plt.suptitle(f'Optical DVL — {args.label} ({args.method.upper()} flow, altitude {args.altitude}m)')
+        plt.tight_layout()
+        plt.savefig(args.plot, dpi=130, bbox_inches='tight')
+
+if __name__ == '__main__': main()
--- a/scripts/dvl_optical_full.py
+++ b/scripts/dvl_optical_full.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""Optical DVL with rotation+scale derived from optical flow ONLY (no IMU).
+Per frame: track features (KLT), fit similarity transform (tx, ty, theta, scale),
+extract metric translation + heading delta, integrate in world frame.
+"""
+import argparse, csv, math
+from pathlib import Path
+from datetime import datetime
+import numpy as np
+import cv2
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--frames-dir', required=True)
+    ap.add_argument('--altitude', type=float, default=1.5)
+    ap.add_argument('--fov-deg', type=float, default=122.0)
+    ap.add_argument('--fps', type=float, default=1.0)
+    ap.add_argument('--start-iso', default='2026-05-05T00:00:00')
+    ap.add_argument('--label', default='segment')
+    ap.add_argument('--out', required=True)
+    ap.add_argument('--plot', default=None)
+    ap.add_argument('--ref-csv', default=None)
+    ap.add_argument('--init-heading-deg', type=float, default=0.0)
+    args = ap.parse_args()
+
+    frames = sorted(Path(args.frames_dir).glob('frame_*.jpg'))
+    print(f'[dvl] {len(frames)} frames', flush=True)
+    
+    W, H = 518, 294
+    f = (W/2) / math.tan(math.radians(args.fov_deg/2))
+    px_to_m = args.altitude / f
+    print(f'[dvl] focal_px={f:.1f} px_to_m={px_to_m:.5f}', flush=True)
+    
+    t0 = datetime.fromisoformat(args.start_iso).timestamp()
+    heading = args.init_heading_deg
+    east_cum, north_cum = 0.0, 0.0
+    
+    rows = []
+    rows.append({'frame_idx':0,'ts_s':t0,'heading_deg':heading,'d_theta_deg':0,'scale':1.0,
+                 'dx_cam_px':0,'dy_cam_px':0,'east_m':0,'north_m':0,'inliers':0})
+    
+    prev_gray = cv2.imread(str(frames[0]), cv2.IMREAD_GRAYSCALE)
+    prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=1000, qualityLevel=0.01, minDistance=7, blockSize=7)
+    
+    for i in range(1, len(frames)):
+        curr_gray = cv2.imread(str(frames[i]), cv2.IMREAD_GRAYSCALE)
+        if curr_gray is None: continue
+        
+        if prev_pts is None or len(prev_pts) < 100:
+            prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=1000, qualityLevel=0.01, minDistance=7, blockSize=7)
+        
+        curr_pts, status, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, prev_pts, None, winSize=(21,21), maxLevel=3)
+        
+        good_prev = prev_pts[status.flatten()==1]
+        good_curr = curr_pts[status.flatten()==1]
+        n_tracked = len(good_prev)
+        
+        if n_tracked < 30:
+            # tracking lost - keep last heading + skip motion
+            rows.append({'frame_idx':i,'ts_s':t0+i/args.fps,'heading_deg':heading,'d_theta_deg':0,'scale':1.0,
+                         'dx_cam_px':0,'dy_cam_px':0,'east_m':east_cum,'north_m':north_cum,'inliers':0})
+            prev_gray = curr_gray
+            prev_pts = cv2.goodFeaturesToTrack(curr_gray, maxCorners=1000, qualityLevel=0.01, minDistance=7, blockSize=7)
+            continue
+        
+        # Fit similarity 2D (translation + rotation + scale)
+        M, inliers = cv2.estimateAffinePartial2D(good_prev.reshape(-1,2), good_curr.reshape(-1,2), method=cv2.RANSAC, ransacReprojThreshold=2.0)
+        if M is None:
+            rows.append({'frame_idx':i,'ts_s':t0+i/args.fps,'heading_deg':heading,'d_theta_deg':0,'scale':1.0,
+                         'dx_cam_px':0,'dy_cam_px':0,'east_m':east_cum,'north_m':north_cum,'inliers':0})
+            prev_gray = curr_gray
+            prev_pts = cv2.goodFeaturesToTrack(curr_gray, maxCorners=1000, qualityLevel=0.01, minDistance=7, blockSize=7)
+            continue
+        
+        # M = [[s*cos(theta), -s*sin(theta), tx], [s*sin(theta), s*cos(theta), ty]]
+        # Extract scale, rotation, translation
+        a, b, tx = M[0]
+        c, d, ty = M[1]
+        s = math.sqrt(a*a + b*b)
+        theta = math.atan2(c, a)  # rotation angle (in image coords)
+        n_inliers = int(inliers.sum()) if inliers is not None else 0
+        
+        # AUV motion is OPPOSITE to apparent ground motion
+        dx_world_cam = -tx * px_to_m  # AUV moved -tx in cam-X
+        dy_world_cam = -ty * px_to_m  # AUV moved -ty in cam-Y
+        # Heading delta = -theta (if features rotate clockwise in image, AUV yawed counter-clockwise from above)
+        d_theta_deg = -math.degrees(theta)
+        heading += d_theta_deg
+        heading %= 360
+        
+        # Rotate cam-frame motion (dx,dy) by current world heading
+        hdg_rad = math.radians(heading)
+        # body forward = +dy_cam (if cam Y_image = AUV forward; if down-facing cam mounted with image up = AUV forward)
+        body_forward = dy_world_cam
+        body_right = dx_world_cam
+        de = body_forward * math.sin(hdg_rad) + body_right * math.cos(hdg_rad)
+        dn = body_forward * math.cos(hdg_rad) - body_right * math.sin(hdg_rad)
+        east_cum += de
+        north_cum += dn
+        
+        rows.append({'frame_idx':i,'ts_s':t0+i/args.fps,'heading_deg':heading,'d_theta_deg':d_theta_deg,'scale':s,
+                     'dx_cam_px':tx,'dy_cam_px':ty,'east_m':east_cum,'north_m':north_cum,'inliers':n_inliers})
+        
+        prev_gray = curr_gray
+        # Refresh features periodically
+        prev_pts = cv2.goodFeaturesToTrack(curr_gray, maxCorners=1000, qualityLevel=0.01, minDistance=7, blockSize=7)
+        
+        if i % 100 == 0:
+            print(f'[dvl] {i}/{len(frames)} tracked={n_tracked} inl={n_inliers} d_th={d_theta_deg:+.2f}° hdg={heading:.0f}° s={s:.3f} pos=({east_cum:.2f},{north_cum:.2f})', flush=True)
+    
+    print(f'[dvl] done. Final ENU: ({east_cum:.2f}, {north_cum:.2f}) m. Final heading {heading:.0f}°', flush=True)
+    
+    with open(args.out, 'w', newline='') as ff:
+        w = csv.DictWriter(ff, fieldnames=list(rows[0].keys()))
+        w.writeheader(); w.writerows(rows)
+    print(f'[out] {args.out}', flush=True)
+    
+    if args.plot:
+        import matplotlib
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+        fig, axes = plt.subplots(2, 2, figsize=(14, 12))
+        ax_xy, ax_hdg, ax_speed, ax_cmp = axes[0,0], axes[0,1], axes[1,0], axes[1,1]
+        e = [r['east_m'] for r in rows]; n = [r['north_m'] for r in rows]
+        ax_xy.plot(e, n, '-b', linewidth=1.2)
+        ax_xy.plot(e[0], n[0], 'go', markersize=10, label='start')
+        ax_xy.plot(e[-1], n[-1], 'r^', markersize=10, label='end')
+        ax_xy.set_xlabel('East (m)'); ax_xy.set_ylabel('North (m)'); ax_xy.set_title('DVL trajectory (rotation from optical flow)')
+        ax_xy.set_aspect('equal'); ax_xy.legend(); ax_xy.grid(True, alpha=0.3)
+        
+        hdgs = [r['heading_deg'] for r in rows]
+        ax_hdg.plot(range(len(rows)), hdgs, '-c'); ax_hdg.set_xlabel('Frame'); ax_hdg.set_ylabel('Heading (deg)'); ax_hdg.set_title('Heading (integrated from optical flow rotation)'); ax_hdg.grid(True, alpha=0.3)
+        
+        scales = [r['scale'] for r in rows]
+        ax_speed.plot(range(len(rows)), scales, color='orange'); ax_speed.set_xlabel('Frame'); ax_speed.set_ylabel('Scale (1=no zoom)'); ax_speed.set_title('Scale per frame (>1 = zoom in = down)'); ax_speed.axhline(1.0, color='k', alpha=0.3); ax_speed.grid(True, alpha=0.3)
+        
+        if args.ref_csv:
+            try:
+                with open(args.ref_csv) as fff:
+                    refrows = [r for r in csv.DictReader(fff) if r.get('segment','')==args.label or r.get('label','')==args.label]
+                rx = [float(r['x']) for r in refrows]
+                ry = [float(r['y']) for r in refrows]
+                ax_cmp.plot(e, n, '-b', linewidth=1.2, label='DVL optical (rotation included)', alpha=0.7)
+                ax_cmp.plot(rx, ry, '-r', linewidth=1.2, label='lingbot', alpha=0.7)
+                ax_cmp.set_xlabel('East'); ax_cmp.set_ylabel('North'); ax_cmp.set_title('Comparison')
+                ax_cmp.set_aspect('equal'); ax_cmp.legend(); ax_cmp.grid(True, alpha=0.3)
+            except Exception as e: print(f'[ref] {e}', flush=True)
+        else:
+            ax_cmp.set_title('(no reference)')
+        
+        plt.suptitle(f'DVL optical (rotation+scale from cv2.estimateAffinePartial2D) — {args.label}')
+        plt.tight_layout()
+        plt.savefig(args.plot, dpi=130, bbox_inches='tight')
+        print(f'[plot] {args.plot}', flush=True)
+
+if __name__ == '__main__': main()
--- a/scripts/dvl_optical_imu.py
+++ b/scripts/dvl_optical_imu.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+"""Optical DVL + IMU heading correction.
+Same as dvl_optical.py but rotates cam-frame flow to world frame using compass_hdg.
+
+Usage:
+  python3 dvl_optical_imu.py --frames-dir <dir> --bag-dir <auv_bags_dir> \
+      --altitude 1.5 --fps 1.0 --start-iso ... --label ... \
+      --out csv --plot png [--ref-csv ...]
+"""
+import argparse, csv, math, sys
+from pathlib import Path
+from datetime import datetime
+import numpy as np
+import cv2
+from rosbags.highlevel import AnyReader
+
+def load_heading(bag_dir, t_start, t_end):
+    bags = sorted(Path(bag_dir).glob('*.mcap'))
+    # filter empty
+    bags = [b for b in bags if b.stat().st_size > 1000]
+    headings = []  # list of (ts_s, heading_deg)
+    for b in bags:
+        try:
+            with AnyReader([b]) as r:
+                for conn, ts_ns, raw in r.messages(connections=[c for c in r.connections if c.topic == '/mavros/global_position/compass_hdg']):
+                    t = ts_ns / 1e9
+                    if t_start - 60 <= t <= t_end + 60:
+                        m = r.deserialize(raw, conn.msgtype)
+                        headings.append((t, m.data))
+        except Exception as e:
+            print(f'[warn] {b.name}: {e}', flush=True)
+    headings.sort()
+    return headings
+
+def nearest_hdg(headings, t_target):
+    if not headings: return None
+    ts = [h[0] for h in headings]
+    idx = np.searchsorted(ts, t_target)
+    if idx == 0: return headings[0][1]
+    if idx >= len(headings): return headings[-1][1]
+    if abs(headings[idx][0] - t_target) < abs(headings[idx-1][0] - t_target):
+        return headings[idx][1]
+    return headings[idx-1][1]
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--frames-dir', required=True)
+    ap.add_argument('--bag-dir', required=True)
+    ap.add_argument('--altitude', type=float, default=1.5)
+    ap.add_argument('--fov-deg', type=float, default=122.0)
+    ap.add_argument('--fps', type=float, default=1.0)
+    ap.add_argument('--start-iso', default='2026-05-05T00:00:00')
+    ap.add_argument('--label', default='segment')
+    ap.add_argument('--out', required=True)
+    ap.add_argument('--plot', default=None)
+    ap.add_argument('--ref-csv', default=None)
+    args = ap.parse_args()
+    
+    frames = sorted(Path(args.frames_dir).glob('frame_*.jpg'))
+    print(f'[dvl] {len(frames)} frames', flush=True)
+    
+    t0 = datetime.fromisoformat(args.start_iso).timestamp()
+    t_end = t0 + len(frames) / args.fps
+    
+    # Load heading
+    print(f'[hdg] loading from {args.bag_dir}', flush=True)
+    headings = load_heading(args.bag_dir, t0, t_end)
+    print(f'[hdg] {len(headings)} samples loaded, t range: {headings[0][0]:.0f}-{headings[-1][0]:.0f}', flush=True)
+    
+    W, H = 518, 294
+    f = (W/2) / math.tan(math.radians(args.fov_deg/2))
+    px_to_m = args.altitude / f
+    print(f'[dvl] px_to_m={px_to_m:.5f}', flush=True)
+    
+    rows = []
+    rows.append({'frame_idx': 0, 'ts_s': t0, 'heading_deg': nearest_hdg(headings, t0) or 0, 'flow_x_px': 0, 'flow_y_px': 0, 
+                 'speed_mps': 0, 'east_m': 0, 'north_m': 0})
+    
+    prev = cv2.imread(str(frames[0]), cv2.IMREAD_GRAYSCALE)
+    east_cum, north_cum = 0.0, 0.0
+    
+    for i in range(1, len(frames)):
+        curr = cv2.imread(str(frames[i]), cv2.IMREAD_GRAYSCALE)
+        if curr is None: continue
+        t_frame = t0 + i / args.fps
+        hdg = nearest_hdg(headings, t_frame) or 0
+        
+        flow = cv2.calcOpticalFlowFarneback(prev, curr, None, 0.5, 3, 21, 3, 5, 1.2, 0)
+        fx_cam = np.median(flow[..., 0])
+        fy_cam = np.median(flow[..., 1])
+        
+        # convert px → m in CAM frame (cam right = +X_cam, cam down = +Y_cam image coord)
+        dx_cam = -fx_cam * px_to_m  # AUV moves opposite to flow
+        dy_cam = -fy_cam * px_to_m
+        
+        # Apply heading rotation: cam +X_cam = body forward? assume cam frame Y axis = AUV forward
+        # The downward camera: cam +Y_image = body forward typically (or -Y if mounted otherwise)
+        # heading = degrees clockwise from North in body frame
+        # World rotation: rotate body (dy_cam = forward, dx_cam = right) by heading angle from north
+        hdg_rad = math.radians(hdg)
+        # body forward (north when hdg=0) component:
+        # body_forward_m = dy_cam (assuming cam Y_image = forward)
+        # body_right_m = dx_cam
+        body_forward = dy_cam  # may need sign flip depending on mounting; we'll see
+        body_right = dx_cam
+        # world East = forward*sin(hdg) + right*cos(hdg)
+        # world North = forward*cos(hdg) - right*sin(hdg)
+        de = body_forward * math.sin(hdg_rad) + body_right * math.cos(hdg_rad)
+        dn = body_forward * math.cos(hdg_rad) - body_right * math.sin(hdg_rad)
+        
+        east_cum += de
+        north_cum += dn
+        speed_mps = math.sqrt(de**2 + dn**2) * args.fps
+        
+        rows.append({'frame_idx': i, 'ts_s': t_frame, 'heading_deg': hdg, 'flow_x_px': float(fx_cam), 'flow_y_px': float(fy_cam),
+                     'speed_mps': speed_mps, 'east_m': east_cum, 'north_m': north_cum})
+        prev = curr
+        if i % 100 == 0:
+            print(f'[dvl] {i}/{len(frames)} hdg={hdg:.1f}° flow=({fx_cam:.1f},{fy_cam:.1f}) pos=({east_cum:.2f},{north_cum:.2f})', flush=True)
+    
+    print(f'[dvl] done. Final ENU: ({east_cum:.2f}, {north_cum:.2f}) m', flush=True)
+    
+    with open(args.out, 'w', newline='') as ff:
+        w = csv.DictWriter(ff, fieldnames=list(rows[0].keys()))
+        w.writeheader(); w.writerows(rows)
+    print(f'[out] {args.out}', flush=True)
+    
+    if args.plot:
+        import matplotlib
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+        fig, axes = plt.subplots(2, 2, figsize=(14, 12))
+        ax_xy, ax_hdg, ax_speed, ax_cmp = axes[0,0], axes[0,1], axes[1,0], axes[1,1]
+        e = [r['east_m'] for r in rows]
+        n = [r['north_m'] for r in rows]
+        ax_xy.plot(e, n, '-b', linewidth=1.2)
+        ax_xy.plot(e[0], n[0], 'go', markersize=10, label='start')
+        ax_xy.plot(e[-1], n[-1], 'r^', markersize=10, label='end')
+        ax_xy.set_xlabel('East (m)'); ax_xy.set_ylabel('North (m)'); ax_xy.set_title('DVL + IMU heading trajectory')
+        ax_xy.set_aspect('equal'); ax_xy.legend(); ax_xy.grid(True, alpha=0.3)
+        
+        hdgs = [r['heading_deg'] for r in rows]
+        ax_hdg.plot(range(len(rows)), hdgs, '-c'); ax_hdg.set_xlabel('Frame'); ax_hdg.set_ylabel('Heading (deg)'); ax_hdg.set_title('Compass heading from MCAP'); ax_hdg.grid(True, alpha=0.3)
+        
+        speeds = [r['speed_mps'] for r in rows]
+        ax_speed.plot(range(len(rows)), speeds, '-r'); ax_speed.set_xlabel('Frame'); ax_speed.set_ylabel('Speed m/s'); ax_speed.set_title('Speed over time'); ax_speed.grid(True, alpha=0.3)
+        
+        if args.ref_csv:
+            try:
+                with open(args.ref_csv) as fff:
+                    refrows = [r for r in csv.DictReader(fff) if r.get('segment','')==args.label or r.get('label','')==args.label]
+                rx = [float(r['x']) for r in refrows]
+                ry = [float(r['y']) for r in refrows]
+                ax_cmp.plot(e, n, '-b', linewidth=1.2, label='DVL+IMU', alpha=0.7)
+                ax_cmp.plot(rx, ry, '-r', linewidth=1.2, label='lingbot', alpha=0.7)
+                ax_cmp.set_xlabel('X/East'); ax_cmp.set_ylabel('Y/North'); ax_cmp.set_title('Comparison'); ax_cmp.set_aspect('equal'); ax_cmp.legend(); ax_cmp.grid(True, alpha=0.3)
+            except Exception as e: print(f'[ref] {e}')
+        else:
+            ax_cmp.set_title('(no reference)')
+        
+        plt.suptitle(f'DVL+IMU heading — {args.label}')
+        plt.tight_layout()
+        plt.savefig(args.plot, dpi=130, bbox_inches='tight')
+        print(f'[plot] {args.plot}', flush=True)
+
+if __name__ == '__main__': main()
--- a/scripts/loop_closure_lightglue.py
+++ b/scripts/loop_closure_lightglue.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+"""Loop closure detection via LightGlue (SuperPoint + LightGlue matcher).
+
+Pipeline:
+ 1. Read DVL trajectory CSV (raw east_m,north_m per frame).
+ 2. Build candidate pairs (i, j) with |i-j| > min_sep.
+    Sample stratifie if > max_pairs.
+ 3. Send pairs + frames to GPU host (.87) via SSH; LightGlue runs there.
+ 4. Filter pairs with n_high > match_threshold = loop closures.
+ 5. Apply linear-ramp correction (same algo as pHash variant): for each LC,
+    pull frame j back to frame i, distribute drift across [i+1..j] linearly
+    and carry offset forward for k > j.
+
+Usage:
+  python3 loop_closure_lightglue.py \
+      --frames-dir /tmp/frames_GX019818/ \
+      --dvl-csv /tmp/dvl_full_GX019818.csv \
+      --out-corrected /tmp/dvl_lightglue_GX019818.csv \
+      --plot /tmp/loop_closure_lightglue.png \
+      --min-sep 60 --match-threshold 50 --max-pairs 30000 \
+      --gpu-host 192.168.0.87 --gpu-user floppyrj45 \
+      --gpu-frames-dir /home/floppyrj45/lightglue-test/frames_GX019818 \
+      --gpu-venv /home/floppyrj45/lightglue-test/venv \
+      --gpu-worker /home/floppyrj45/lightglue-test/lightglue_pairs_worker.py
+"""
+import argparse
+import csv
+import math
+import os
+import random
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import numpy as np
+
+
+def stratified_pairs(n_frames, min_sep, max_pairs, seed=42):
+    """Sample pairs (i,j) with |i-j| > min_sep, stratified by separation bucket.
+
+    Tries to get good coverage: for each separation range [min_sep..2*min_sep],
+    [2*min_sep..4*min_sep], ..., draw equal share. Plus all-i to random-j fallback.
+    """
+    rng = random.Random(seed)
+    pairs = set()
+
+    # Brute force for small N: all pairs |i-j|>min_sep then truncate
+    full_count = 0
+    for i in range(n_frames):
+        for j in range(i + min_sep + 1, n_frames):
+            full_count += 1
+    if full_count <= max_pairs:
+        for i in range(n_frames):
+            for j in range(i + min_sep + 1, n_frames):
+                pairs.add((i, j))
+        out = sorted(pairs)
+        return out
+
+    # Stratified buckets by log separation
+    deltas = []
+    d = min_sep + 1
+    while d < n_frames:
+        deltas.append(d)
+        d = int(d * 1.7) + 1
+    deltas.append(n_frames)
+    buckets = list(zip(deltas[:-1], deltas[1:]))
+    if not buckets:
+        buckets = [(min_sep + 1, n_frames)]
+    per_bucket = max_pairs // len(buckets)
+
+    for (lo, hi) in buckets:
+        attempts = 0
+        added = 0
+        while added < per_bucket and attempts < per_bucket * 20:
+            attempts += 1
+            i = rng.randrange(n_frames)
+            delta = rng.randint(lo, max(lo + 1, hi - 1))
+            j = i + delta
+            if j >= n_frames:
+                j = i - delta
+            if 0 <= j < n_frames and abs(i - j) > min_sep:
+                a, b = min(i, j), max(i, j)
+                if (a, b) not in pairs:
+                    pairs.add((a, b))
+                    added += 1
+    out = sorted(pairs)
+    return out
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--frames-dir', required=True)
+    ap.add_argument('--dvl-csv', required=True)
+    ap.add_argument('--out-corrected', required=True)
+    ap.add_argument('--plot', default=None)
+    ap.add_argument('--min-sep', type=int, default=60)
+    ap.add_argument('--match-threshold', type=int, default=50)
+    ap.add_argument('--max-pairs', type=int, default=30000)
+    ap.add_argument('--gpu-host', default='192.168.0.87')
+    ap.add_argument('--gpu-user', default='floppyrj45')
+    ap.add_argument('--gpu-frames-dir', default='/home/floppyrj45/lightglue-test/frames_GX019818')
+    ap.add_argument('--gpu-venv', default='/home/floppyrj45/lightglue-test/venv')
+    ap.add_argument('--gpu-worker', default='/home/floppyrj45/lightglue-test/lightglue_pairs_worker.py')
+    ap.add_argument('--remote-pairs-path', default='/tmp/lg_pairs.txt')
+    ap.add_argument('--remote-out-path', default='/tmp/lg_matches.csv')
+    ap.add_argument('--n-positions-cap', type=int, default=0,
+                    help='if >0, cap n_positions used for pair generation (must match GPU frames count)')
+    args = ap.parse_args()
+
+    # Map DVL CSV rows to frames present locally — we need positions in *sorted frames* on GPU host.
+    # We assume frame_idx in CSV matches file name 'frame_NNNN.jpg' with NNNN = frame_idx+1 zero-padded
+    # OR matches sorted index. Since file names are sequential (frame_0001..frame_1451) and DVL has 1663
+    # rows, only frames 0..1450 are physically present. We restrict LC search to those rows AND only
+    # frames whose file exists.
+
+    frames_dir = Path(args.frames_dir)
+    local_frames = sorted(p.name for p in frames_dir.iterdir()
+                          if p.suffix.lower() in ('.jpg', '.jpeg', '.png'))
+    # local_frames sorted == what worker will sort → indices align across hosts.
+
+    # Map frame_name "frame_0001.jpg" -> 1-based number -> 0-based dvl frame_idx = num-1
+    def name_to_dvl_idx(name):
+        stem = Path(name).stem  # frame_0001
+        num = int(stem.split('_')[1])
+        return num - 1  # 0-based
+
+    pos_to_dvl = [name_to_dvl_idx(n) for n in local_frames]
+    n_positions = len(local_frames)
+    if args.n_positions_cap and args.n_positions_cap < n_positions:
+        n_positions = args.n_positions_cap
+        pos_to_dvl = pos_to_dvl[:n_positions]
+    print(f'[lc] positions used for pairs: {n_positions}', flush=True)
+
+    # DVL CSV
+    dvl_rows = list(csv.DictReader(open(args.dvl_csv)))
+    e_full = np.array([float(r['east_m']) for r in dvl_rows])
+    n_full = np.array([float(r['north_m']) for r in dvl_rows])
+    n_full_rows = len(dvl_rows)
+    print(f'[lc] dvl rows: {n_full_rows}', flush=True)
+
+    # Build candidate pairs over *positions* (worker indexes positions of sorted frames)
+    pairs_pos = stratified_pairs(n_positions, args.min_sep, args.max_pairs)
+    print(f'[lc] candidate pairs: {len(pairs_pos)}', flush=True)
+
+    # Write pairs file locally then scp to GPU host
+    with tempfile.NamedTemporaryFile('w', delete=False, suffix='.txt') as f:
+        pairs_local_path = f.name
+        for i, j in pairs_pos:
+            f.write(f'{i},{j}\n')
+    print(f'[lc] wrote pairs file {pairs_local_path}', flush=True)
+
+    scp_cmd = ['scp', '-o', 'StrictHostKeyChecking=no', pairs_local_path,
+               f'{args.gpu_user}@{args.gpu_host}:{args.remote_pairs_path}']
+    subprocess.run(scp_cmd, check=True)
+    print(f'[lc] uploaded pairs to {args.gpu_host}:{args.remote_pairs_path}', flush=True)
+
+    # Run worker remotely
+    remote_cmd = (
+        f'source {args.gpu_venv}/bin/activate && '
+        f'python3 {args.gpu_worker} '
+        f'--frames-dir {args.gpu_frames_dir} '
+        f'--pairs-file {args.remote_pairs_path} '
+        f'--out-file {args.remote_out_path} '
+        f'--score-thr 0.5'
+    )
+    ssh_cmd = ['ssh', '-o', 'StrictHostKeyChecking=no',
+               f'{args.gpu_user}@{args.gpu_host}', remote_cmd]
+    print(f'[lc] invoking worker remotely ...', flush=True)
+    r = subprocess.run(ssh_cmd)
+    if r.returncode != 0:
+        print(f'[lc] remote worker failed rc={r.returncode}', file=sys.stderr)
+        sys.exit(r.returncode)
+
+    # Pull back matches CSV
+    local_matches = '/tmp/lg_matches_local.csv'
+    subprocess.run(['scp', '-o', 'StrictHostKeyChecking=no',
+                    f'{args.gpu_user}@{args.gpu_host}:{args.remote_out_path}', local_matches],
+                   check=True)
+    print(f'[lc] pulled matches to {local_matches}', flush=True)
+
+    # Parse matches, filter
+    loops = []  # (dvl_i, dvl_j, n_high)
+    with open(local_matches) as f:
+        next(f)  # header
+        for line in f:
+            parts = line.strip().split(',')
+            if len(parts) < 4:
+                continue
+            pi, pj, n_total, n_high = int(parts[0]), int(parts[1]), int(parts[2]), int(parts[3])
+            if n_high > args.match_threshold:
+                di = pos_to_dvl[pi]
+                dj = pos_to_dvl[pj]
+                if di > dj:
+                    di, dj = dj, di
+                if dj - di > args.min_sep:
+                    loops.append((di, dj, n_high))
+    print(f'[lc] kept {len(loops)} loop closures (n_high > {args.match_threshold})', flush=True)
+
+    # Apply linear-ramp correction (same as phash variant)
+    e_corr = e_full.copy()
+    n_corr = n_full.copy()
+    n_applied = 0
+    # Sort loops by i ascending then by j ascending so corrections are applied left to right
+    loops.sort(key=lambda x: (x[0], x[1]))
+    for i, j, nh in loops:
+        if j >= len(e_corr):
+            continue
+        dx = e_corr[i] - e_corr[j]
+        dy = n_corr[i] - n_corr[j]
+        nsteps = j - i
+        for k in range(i + 1, j + 1):
+            ratio = (k - i) / nsteps
+            e_corr[k] += dx * ratio
+            n_corr[k] += dy * ratio
+        for k in range(j + 1, len(e_corr)):
+            e_corr[k] += dx
+            n_corr[k] += dy
+        n_applied += 1
+    print(f'[lc] applied {n_applied} corrections', flush=True)
+
+    with open(args.out_corrected, 'w', newline='') as f:
+        w = csv.writer(f)
+        w.writerow(['frame_idx', 'ts_s', 'east_m_orig', 'north_m_orig', 'east_m_corr', 'north_m_corr', 'n_loops'])
+        for k, r in enumerate(dvl_rows):
+            w.writerow([r['frame_idx'], r['ts_s'], e_full[k], n_full[k], e_corr[k], n_corr[k],
+                        n_applied if k == 0 else ''])
+    print(f'[out] {args.out_corrected}', flush=True)
+
+    if args.plot:
+        import matplotlib
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+        fig, axes = plt.subplots(1, 2, figsize=(14, 7))
+        axes[0].plot(e_full, n_full, '-b', lw=1)
+        axes[0].plot(e_full[0], n_full[0], 'go', ms=10)
+        axes[0].plot(e_full[-1], n_full[-1], 'r^', ms=10)
+        axes[0].set_title(f'RAW DVL\nbbox={e_full.max()-e_full.min():.1f}x{n_full.max()-n_full.min():.1f}m')
+        axes[0].set_xlabel('East m'); axes[0].set_ylabel('North m'); axes[0].set_aspect('equal'); axes[0].grid(alpha=0.3)
+        axes[1].plot(e_corr, n_corr, '-r', lw=1)
+        axes[1].plot(e_corr[0], n_corr[0], 'go', ms=10)
+        axes[1].plot(e_corr[-1], n_corr[-1], 'r^', ms=10)
+        axes[1].set_title(f'LightGlue LC ({n_applied} loops)\nbbox={e_corr.max()-e_corr.min():.1f}x{n_corr.max()-n_corr.min():.1f}m')
+        axes[1].set_xlabel('East m'); axes[1].set_ylabel('North m'); axes[1].set_aspect('equal'); axes[1].grid(alpha=0.3)
+        plt.suptitle(f'LightGlue loop closure — GX019818 (thr={args.match_threshold})')
+        plt.tight_layout()
+        plt.savefig(args.plot, dpi=130, bbox_inches='tight')
+        print(f'[plot] {args.plot}', flush=True)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/loop_closure_phash.py
+++ b/scripts/loop_closure_phash.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+"""Loop closure detection via perceptual hashing.
+
+For each frame, compute pHash (DCT-based perceptual hash).
+Find pairs (i, j) with |i-j| > MIN_SEPARATION and hash distance < THRESHOLD.
+These are loop closures — AUV revisited same physical location.
+
+Then correct DVL trajectory by snapping back at loop closures.
+
+Usage:
+  python3 loop_closure_phash.py --frames-dir <dir> --dvl-csv <csv> \
+    --out-corrected /tmp/dvl_loopclosed.csv --plot /tmp/loop_closure.png \
+    --min-sep 60 --max-dist 8
+"""
+import argparse, csv, math
+from pathlib import Path
+import numpy as np
+from PIL import Image
+import imagehash
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--frames-dir', required=True)
+    ap.add_argument('--dvl-csv', required=True)
+    ap.add_argument('--out-corrected', required=True)
+    ap.add_argument('--plot', default=None)
+    ap.add_argument('--min-sep', type=int, default=60, help='min frame separation to count as loop')
+    ap.add_argument('--max-dist', type=int, default=10, help='max pHash Hamming distance for match')
+    ap.add_argument('--hash-size', type=int, default=8)
+    args = ap.parse_args()
+    
+    frames = sorted(Path(args.frames_dir).glob('frame_*.jpg'))
+    print(f'[loop] hashing {len(frames)} frames (pHash size {args.hash_size})...', flush=True)
+    
+    hashes = []
+    for i, f in enumerate(frames):
+        img = Image.open(f)
+        h = imagehash.phash(img, hash_size=args.hash_size)
+        hashes.append(h)
+        if i % 200 == 0: print(f'  hashed {i}/{len(frames)}', flush=True)
+    
+    print(f'[loop] searching loop closures (min_sep={args.min_sep}, max_dist={args.max_dist})...', flush=True)
+    loops = []  # list of (i, j, distance)
+    for i in range(len(hashes)):
+        for j in range(i + args.min_sep, len(hashes)):
+            d = hashes[i] - hashes[j]
+            if d <= args.max_dist:
+                loops.append((i, j, d))
+        if i % 200 == 0: print(f'  search at {i}, loops found so far: {len(loops)}', flush=True)
+    
+    print(f'[loop] found {len(loops)} loop closures', flush=True)
+    
+    # Load DVL trajectory
+    dvl_rows = list(csv.DictReader(open(args.dvl_csv)))
+    e = np.array([float(r['east_m']) for r in dvl_rows])
+    n = np.array([float(r['north_m']) for r in dvl_rows])
+    
+    # Simple correction: for each loop closure (i, j), interpolate a rigid correction
+    # over [i, j] to bring j back to i's position
+    # We'll apply gradual correction: for k in [i, j], offset by linear ramp
+    e_corr = e.copy(); n_corr = n.copy()
+    n_corrections = 0
+    for i, j, d in loops:
+        if j >= len(e_corr): continue
+        dx = e_corr[i] - e_corr[j]
+        dy = n_corr[i] - n_corr[j]
+        # spread correction linearly over [i+1, j]
+        nsteps = j - i
+        for k in range(i+1, j+1):
+            ratio = (k - i) / nsteps
+            e_corr[k] += dx * ratio
+            n_corr[k] += dy * ratio
+        # carry forward the offset to all frames after j
+        for k in range(j+1, len(e_corr)):
+            e_corr[k] += dx
+            n_corr[k] += dy
+        n_corrections += 1
+    
+    print(f'[loop] applied {n_corrections} corrections to trajectory', flush=True)
+    
+    with open(args.out_corrected, 'w', newline='') as ff:
+        w = csv.writer(ff)
+        w.writerow(['frame_idx','ts_s','east_m_orig','north_m_orig','east_m_corr','north_m_corr'])
+        for k, r in enumerate(dvl_rows):
+            w.writerow([r['frame_idx'], r['ts_s'], e[k], n[k], e_corr[k], n_corr[k]])
+    print(f'[out] {args.out_corrected}', flush=True)
+    
+    if args.plot:
+        import matplotlib
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+        fig, axes = plt.subplots(2, 2, figsize=(14, 12))
+        ax_orig, ax_corr, ax_pairs, ax_dist = axes[0,0], axes[0,1], axes[1,0], axes[1,1]
+        ax_orig.plot(e, n, '-b', linewidth=1.0); ax_orig.plot(e[0], n[0], 'go', markersize=10); ax_orig.plot(e[-1], n[-1], 'r^', markersize=10)
+        ax_orig.set_title(f'DVL trajectory ORIGINAL (drift visible)\nbbox={max(e)-min(e):.1f}×{max(n)-min(n):.1f}m')
+        ax_orig.set_xlabel('East (m)'); ax_orig.set_ylabel('North (m)'); ax_orig.set_aspect('equal'); ax_orig.grid(True, alpha=0.3)
+        
+        ax_corr.plot(e_corr, n_corr, '-r', linewidth=1.0); ax_corr.plot(e_corr[0], n_corr[0], 'go', markersize=10); ax_corr.plot(e_corr[-1], n_corr[-1], 'r^', markersize=10)
+        ax_corr.set_title(f'DVL trajectory + LOOP CLOSURE\nbbox={max(e_corr)-min(e_corr):.1f}×{max(n_corr)-min(n_corr):.1f}m\nLoops applied: {n_corrections}')
+        ax_corr.set_xlabel('East (m)'); ax_corr.set_ylabel('North (m)'); ax_corr.set_aspect('equal'); ax_corr.grid(True, alpha=0.3)
+        
+        # plot loop pairs as lines on original
+        ax_pairs.plot(e, n, '-', color='gray', linewidth=0.5, alpha=0.4)
+        for i, j, d in loops[:200]:  # show first 200 pairs
+            ax_pairs.plot([e[i], e[j]], [n[i], n[j]], '-', color='orange', linewidth=0.4, alpha=0.3)
+        ax_pairs.set_title(f'Loop closure pairs (first 200, of {len(loops)})')
+        ax_pairs.set_xlabel('East'); ax_pairs.set_ylabel('North'); ax_pairs.set_aspect('equal'); ax_pairs.grid(True, alpha=0.3)
+        
+        # histogram of loop distances
+        dists = [d for _,_,d in loops]
+        if dists:
+            ax_dist.hist(dists, bins=range(0, max(dists)+2))
+            ax_dist.set_xlabel('Hash Hamming distance'); ax_dist.set_ylabel('Count'); ax_dist.set_title('Loop closure hash distance distribution'); ax_dist.grid(True, alpha=0.3)
+        
+        plt.suptitle(f'Loop closure detection (pHash {args.hash_size}, min_sep={args.min_sep}, max_dist={args.max_dist}) — GX039839')
+        plt.tight_layout()
+        plt.savefig(args.plot, dpi=130, bbox_inches='tight')
+        print(f'[plot] {args.plot}', flush=True)
+
+if __name__ == '__main__': main()
--- a/scripts/loop_closure_sweep.py
+++ b/scripts/loop_closure_sweep.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""Sweep loop closure params on cached hashes."""
+import argparse, csv, math, pickle
+from pathlib import Path
+import numpy as np
+from PIL import Image
+import imagehash
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--frames-dir', required=True)
+    ap.add_argument('--dvl-csv', required=True)
+    ap.add_argument('--hash-cache', default='/tmp/phash_cache.pkl')
+    ap.add_argument('--hash-size', type=int, default=16)  # bigger for finer discrimination
+    ap.add_argument('--out-plot', required=True)
+    ap.add_argument('--min-sep', type=int, default=60)
+    args = ap.parse_args()
+    
+    frames = sorted(Path(args.frames_dir).glob('frame_*.jpg'))
+    
+    # Cache or compute hashes
+    cache_path = Path(args.hash_cache)
+    if cache_path.exists():
+        with open(cache_path,'rb') as f:
+            d = pickle.load(f)
+        if d.get('frames_count') == len(frames) and d.get('hash_size') == args.hash_size and d.get('frames_dir') == str(args.frames_dir):
+            hashes = d['hashes']
+            print(f'[cache] loaded {len(hashes)} hashes from {cache_path}', flush=True)
+        else:
+            cache_path = None
+    if not cache_path or not cache_path.exists():
+        print(f'[hash] computing {len(frames)} pHashes (size={args.hash_size})...', flush=True)
+        hashes = []
+        for i, f in enumerate(frames):
+            h = imagehash.phash(Image.open(f), hash_size=args.hash_size)
+            hashes.append(h)
+            if i % 200 == 0: print(f'  {i}/{len(frames)}', flush=True)
+        with open(args.hash_cache,'wb') as f:
+            pickle.dump({'hashes': hashes, 'frames_count': len(frames), 'hash_size': args.hash_size, 'frames_dir': str(args.frames_dir)}, f)
+        print(f'[cache] saved to {args.hash_cache}', flush=True)
+    
+    # max_dist for hash_size=16 is ~256 bits; scale threshold accordingly
+    # for hash 8: dist 8 ~12%, for hash 16: dist 32 ~12%
+    # try thresholds at 5%, 8%, 12%, 18%
+    n_bits = args.hash_size * args.hash_size
+    thresholds = [int(n_bits*0.05), int(n_bits*0.08), int(n_bits*0.12), int(n_bits*0.18)]
+    print(f'[loop] hash bits={n_bits}, sweep thresholds: {thresholds}', flush=True)
+    
+    dvl_rows = list(csv.DictReader(open(args.dvl_csv)))
+    e_orig = np.array([float(r['east_m']) for r in dvl_rows])
+    n_orig = np.array([float(r['north_m']) for r in dvl_rows])
+    
+    def find_loops_and_correct(max_dist):
+        loops = []
+        for i in range(len(hashes)):
+            for j in range(i + args.min_sep, len(hashes)):
+                d = hashes[i] - hashes[j]
+                if d <= max_dist:
+                    loops.append((i, j, d))
+        e_c = e_orig.copy(); n_c = n_orig.copy()
+        for i, j, d in loops:
+            if j >= len(e_c): continue
+            dx = e_c[i] - e_c[j]; dy = n_c[i] - n_c[j]
+            ns = j - i
+            for k in range(i+1, j+1):
+                ratio = (k-i)/ns
+                e_c[k] += dx*ratio; n_c[k] += dy*ratio
+            for k in range(j+1, len(e_c)):
+                e_c[k] += dx; n_c[k] += dy
+        return loops, e_c, n_c
+    
+    import matplotlib
+    matplotlib.use('Agg')
+    import matplotlib.pyplot as plt
+    fig, axes = plt.subplots(2, 3, figsize=(20, 12))
+    
+    # original
+    ax = axes[0,0]
+    ax.plot(e_orig, n_orig, '-b', linewidth=1.0)
+    ax.plot(e_orig[0], n_orig[0], 'go', markersize=10); ax.plot(e_orig[-1], n_orig[-1], 'r^', markersize=10)
+    bbox=(max(e_orig)-min(e_orig), max(n_orig)-min(n_orig))
+    ax.set_title(f'ORIGINAL (no LC)\nbbox={bbox[0]:.1f}×{bbox[1]:.1f}m')
+    ax.set_xlabel('East'); ax.set_ylabel('North'); ax.set_aspect('equal'); ax.grid(True, alpha=0.3)
+    
+    # corrected for each threshold
+    positions = [(0,1), (0,2), (1,0), (1,1)]
+    for idx, t in enumerate(thresholds):
+        if idx >= len(positions): break
+        loops, e_c, n_c = find_loops_and_correct(t)
+        ax = axes[positions[idx]]
+        ax.plot(e_c, n_c, '-r', linewidth=1.0)
+        ax.plot(e_c[0], n_c[0], 'go', markersize=10); ax.plot(e_c[-1], n_c[-1], 'r^', markersize=10)
+        bbox=(max(e_c)-min(e_c), max(n_c)-min(n_c))
+        end_dist = math.sqrt(e_c[-1]**2 + n_c[-1]**2)
+        ax.set_title(f'max_dist={t} ({t/n_bits*100:.0f}% bits)\n{len(loops)} loops  bbox={bbox[0]:.1f}×{bbox[1]:.1f}m  end={end_dist:.1f}m')
+        ax.set_xlabel('East'); ax.set_ylabel('North'); ax.set_aspect('equal'); ax.grid(True, alpha=0.3)
+        print(f'[t={t}] loops={len(loops)} bbox={bbox} end_dist={end_dist:.1f}', flush=True)
+    
+    # summary: end_dist vs threshold
+    ax = axes[1,2]
+    end_dists = []
+    for t in thresholds:
+        loops, e_c, n_c = find_loops_and_correct(t)
+        end_dists.append((t, len(loops), math.sqrt(e_c[-1]**2+n_c[-1]**2)))
+    ts = [x[0] for x in end_dists]
+    counts = [x[1] for x in end_dists]
+    ed = [x[2] for x in end_dists]
+    ax2 = ax.twinx()
+    ax.plot(ts, counts, 'b-o', label='loop count'); ax.set_ylabel('Loops found', color='b')
+    ax2.plot(ts, ed, 'r-s', label='end_dist'); ax2.set_ylabel('end_dist (m)', color='r')
+    ax.set_xlabel('max_dist threshold'); ax.set_title('Threshold sweep summary')
+    ax.grid(True, alpha=0.3)
+    
+    plt.suptitle(f'Loop closure threshold sweep — GX039839 (pHash size {args.hash_size}, min_sep {args.min_sep})')
+    plt.tight_layout()
+    plt.savefig(args.out_plot, dpi=120, bbox_inches='tight')
+    print(f'[plot] {args.out_plot}', flush=True)
+
+if __name__ == '__main__': main()
--- a/scripts/photomosaic_overlay.py
+++ b/scripts/photomosaic_overlay.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+"""Photomosaic overlay: place each frame at (east, north, heading) on 2D canvas.
+
+KISS: cv2 only, running-mean compositing.
+"""
+import argparse
+import csv
+import glob
+import math
+import os
+import sys
+import time
+
+import cv2
+import numpy as np
+
+
+def load_traj(path):
+    """Return list of dicts with frame_idx, east, north, heading."""
+    rows = []
+    with open(path) as f:
+        rdr = csv.DictReader(f)
+        for r in rdr:
+            try:
+                fi = int(r["frame_idx"])
+            except (KeyError, ValueError):
+                continue
+            # Prefer corrected east/north, fallback to raw east_m/north_m
+            if "east_m_corr" in r and r["east_m_corr"] != "":
+                e = float(r["east_m_corr"])
+                n = float(r["north_m_corr"])
+            elif "east_m" in r:
+                e = float(r["east_m"])
+                n = float(r["north_m"])
+            else:
+                continue
+            h = float(r["heading_deg"]) if "heading_deg" in r and r["heading_deg"] != "" else None
+            rows.append({"frame_idx": fi, "east": e, "north": n, "heading": h})
+    return rows
+
+
+def attach_headings(traj, heading_csv):
+    """Join heading_deg from secondary CSV by frame_idx (loopclosed CSVs miss heading)."""
+    if all(r["heading"] is not None for r in traj):
+        return traj
+    by_idx = {}
+    with open(heading_csv) as f:
+        rdr = csv.DictReader(f)
+        for r in rdr:
+            try:
+                by_idx[int(r["frame_idx"])] = float(r["heading_deg"])
+            except (KeyError, ValueError):
+                pass
+    for r in traj:
+        if r["heading"] is None:
+            r["heading"] = by_idx.get(r["frame_idx"], 0.0)
+    return traj
+
+
+def find_frame(frames_dir, frame_idx):
+    """Try both naming conventions: frame_0001.jpg or frame_00001.jpg."""
+    for digits in (4, 5, 6):
+        p = os.path.join(frames_dir, f"frame_{frame_idx+1:0{digits}d}.jpg")
+        if os.path.exists(p):
+            return p
+        p = os.path.join(frames_dir, f"frame_{frame_idx:0{digits}d}.jpg")
+        if os.path.exists(p):
+            return p
+    return None
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--frames-dir", required=True)
+    ap.add_argument("--traj-csv", required=True)
+    ap.add_argument("--heading-csv", default=None, help="Fallback CSV for heading_deg if missing")
+    ap.add_argument("--altitude", type=float, default=1.5)
+    ap.add_argument("--fov-h", type=float, default=122.0)
+    ap.add_argument("--fov-v", type=float, default=80.0)
+    ap.add_argument("--alpha", type=float, default=0.3)
+    ap.add_argument("--sample-every", type=int, default=5)
+    ap.add_argument("--out", required=True)
+    ap.add_argument("--max-canvas-px", type=int, default=4000)
+    ap.add_argument("--heading-sign", type=int, default=-1, help="-1 for clockwise-from-north (default)")
+    args = ap.parse_args()
+
+    t0 = time.time()
+
+    # 1. Load trajectory
+    traj = load_traj(args.traj_csv)
+    if args.heading_csv:
+        traj = attach_headings(traj, args.heading_csv)
+    else:
+        # Auto-fallback: try dvl_full_*.csv next to traj_csv
+        if any(r["heading"] is None for r in traj):
+            base = os.path.basename(args.traj_csv)
+            # Extract video tag like GX039839 / GX019818
+            for token in base.replace(".", "_").split("_"):
+                if token.startswith("GX") and len(token) >= 6:
+                    cand = f"/tmp/dvl_full_{token}.csv"
+                    if os.path.exists(cand):
+                        print(f"[heading] joining from {cand}")
+                        traj = attach_headings(traj, cand)
+                        break
+
+    if not traj:
+        print("ERROR: empty trajectory", file=sys.stderr)
+        sys.exit(1)
+
+    # Sample
+    traj = traj[:: args.sample_every]
+    print(f"[traj] {len(traj)} frames after sampling (every {args.sample_every})")
+
+    # 2. Footprint at altitude
+    fp_w = 2.0 * args.altitude * math.tan(math.radians(args.fov_h / 2.0))
+    fp_h = 2.0 * args.altitude * math.tan(math.radians(args.fov_v / 2.0))
+    print(f"[footprint] {fp_w:.2f}m wide x {fp_h:.2f}m tall (alt={args.altitude}m)")
+
+    # 3. World bbox + margin
+    es = [r["east"] for r in traj]
+    ns = [r["north"] for r in traj]
+    margin = 1.5 * max(fp_w, fp_h)
+    e_min, e_max = min(es) - margin, max(es) + margin
+    n_min, n_max = min(ns) - margin, max(ns) + margin
+    world_w = e_max - e_min
+    world_h = n_max - n_min
+    print(f"[bbox] east [{e_min:.1f},{e_max:.1f}] north [{n_min:.1f},{n_max:.1f}] = {world_w:.1f}m x {world_h:.1f}m")
+
+    # 4. Canvas pixel size
+    ppm = args.max_canvas_px / max(world_w, world_h)
+    canvas_w = int(world_w * ppm)
+    canvas_h = int(world_h * ppm)
+    print(f"[canvas] {canvas_w}x{canvas_h} px ({ppm:.1f} px/m)")
+
+    # Compositing buffers: sum (float32 BGR) + count (int)
+    acc = np.zeros((canvas_h, canvas_w, 3), dtype=np.float32)
+    cnt = np.zeros((canvas_h, canvas_w), dtype=np.int32)
+
+    fp_px_w = max(2, int(fp_w * ppm))
+    fp_px_h = max(2, int(fp_h * ppm))
+    print(f"[footprint-px] {fp_px_w}x{fp_px_h}")
+
+    placed = 0
+    skipped = 0
+    for i, r in enumerate(traj):
+        path = find_frame(args.frames_dir, r["frame_idx"])
+        if not path:
+            skipped += 1
+            continue
+        img = cv2.imread(path)
+        if img is None:
+            skipped += 1
+            continue
+
+        # Resize image to footprint pixel size first (keep aspect)
+        img_resized = cv2.resize(img, (fp_px_w, fp_px_h), interpolation=cv2.INTER_AREA)
+
+        # Rotate by heading. Convention: heading 0 = north, positive clockwise.
+        # We rotate image so image "up" aligns with north direction.
+        # cv2 rotation positive = counterclockwise → use -heading * sign
+        heading = r["heading"] if r["heading"] is not None else 0.0
+        angle = args.heading_sign * heading  # default -1 = clockwise
+
+        # Build canvas the size of the rotated bounding box
+        diag = int(math.ceil(math.sqrt(fp_px_w ** 2 + fp_px_h ** 2)))
+        # Pad to diag x diag for safe rotation
+        pad_h = (diag - fp_px_h) // 2
+        pad_w = (diag - fp_px_w) // 2
+        padded = cv2.copyMakeBorder(
+            img_resized, pad_h, diag - fp_px_h - pad_h,
+            pad_w, diag - fp_px_w - pad_w,
+            cv2.BORDER_CONSTANT, value=0,
+        )
+        # Mask = 1 where valid pixels
+        mask = np.zeros((padded.shape[0], padded.shape[1]), dtype=np.uint8)
+        mask[pad_h:pad_h + fp_px_h, pad_w:pad_w + fp_px_w] = 255
+
+        M = cv2.getRotationMatrix2D((diag / 2, diag / 2), angle, 1.0)
+        rotated = cv2.warpAffine(padded, M, (diag, diag), flags=cv2.INTER_LINEAR, borderValue=0)
+        rotated_mask = cv2.warpAffine(mask, M, (diag, diag), flags=cv2.INTER_NEAREST, borderValue=0)
+
+        # Place at world (east, north).
+        # Canvas: x = east (left→right), y = -north (top→bottom, north up)
+        cx_world = r["east"]
+        cy_world = r["north"]
+        px = int((cx_world - e_min) * ppm)
+        py = int((n_max - cy_world) * ppm)  # flip Y for image coords
+
+        # Top-left of paste
+        x0 = px - diag // 2
+        y0 = py - diag // 2
+        x1 = x0 + diag
+        y1 = y0 + diag
+
+        # Clip to canvas
+        cx0 = max(0, x0)
+        cy0 = max(0, y0)
+        cx1 = min(canvas_w, x1)
+        cy1 = min(canvas_h, y1)
+        if cx1 <= cx0 or cy1 <= cy0:
+            skipped += 1
+            continue
+
+        sx0 = cx0 - x0
+        sy0 = cy0 - y0
+        sx1 = sx0 + (cx1 - cx0)
+        sy1 = sy0 + (cy1 - cy0)
+
+        sub_img = rotated[sy0:sy1, sx0:sx1].astype(np.float32)
+        sub_mask = rotated_mask[sy0:sy1, sx0:sx1] > 0
+
+        # Running mean: add to acc + increment cnt only where mask
+        acc[cy0:cy1, cx0:cx1][sub_mask] += sub_img[sub_mask]
+        cnt[cy0:cy1, cx0:cx1][sub_mask] += 1
+        placed += 1
+
+        if (i + 1) % 100 == 0:
+            print(f"  ... {i+1}/{len(traj)} placed={placed} skipped={skipped}")
+
+    # Finalize: divide by count
+    out = np.zeros_like(acc, dtype=np.uint8)
+    valid = cnt > 0
+    out[valid] = (acc[valid] / cnt[valid, None]).astype(np.uint8)
+
+    # Draw trajectory polyline (thin blue)
+    pts = []
+    for r in traj:
+        px = int((r["east"] - e_min) * ppm)
+        py = int((n_max - r["north"]) * ppm)
+        pts.append((px, py))
+    for i in range(1, len(pts)):
+        cv2.line(out, pts[i - 1], pts[i], (255, 200, 0), 1, cv2.LINE_AA)
+    # Mark start (green) and end (red)
+    if pts:
+        cv2.circle(out, pts[0], 8, (0, 255, 0), 2)
+        cv2.circle(out, pts[-1], 8, (0, 0, 255), 2)
+
+    cv2.imwrite(args.out, out)
+    dt = time.time() - t0
+    print(f"[done] placed={placed} skipped={skipped} canvas={canvas_w}x{canvas_h} time={dt:.1f}s out={args.out}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/poses_to_csv.py
+++ b/scripts/poses_to_csv.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3
+"""Extract camera positions (timestamp, x, y, z) from lingbot-map NPZ poses.
+Usage: poses_to_csv.py <input.npz> [--start_iso 2026-05-05T08:34:01] [--fps 1.0] > output.csv
+"""
+import argparse, sys
+import numpy as np
+from datetime import datetime, timezone
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('npz')
+    ap.add_argument('--start_iso', default=None, help='ISO timestamp of frame 0 (e.g. 2026-05-05T08:34:01)')
+    ap.add_argument('--fps', type=float, default=1.0, help='frames per second (default 1.0)')
+    ap.add_argument('--label', default='', help='segment label for CSV col')
+    args = ap.parse_args()
+    
+    data = np.load(args.npz, allow_pickle=True)
+    keys = list(data.keys())
+    # auto-detect poses key
+    poses = None
+    for k in ['poses', 'extrinsics', 'cam_poses', 'c2w']:
+        if k in keys:
+            poses = data[k]; break
+    if poses is None:
+        # take first 3D array
+        for k in keys:
+            arr = data[k]
+            if arr.ndim == 3 and arr.shape[-2:] in [(3,4),(4,4)]:
+                poses = arr; break
+    if poses is None:
+        sys.exit(f'No poses found in {args.npz} (keys: {keys})')
+    
+    # start timestamp
+    if args.start_iso:
+        try:
+            t0 = datetime.fromisoformat(args.start_iso).replace(tzinfo=timezone.utc).timestamp()
+        except Exception:
+            t0 = 0.0
+    elif 'start_ns' in keys:
+        t0 = float(data['start_ns']) / 1e9
+    else:
+        t0 = 0.0
+    
+    fps = float(args.fps)
+    if 'fps' in keys:
+        try: fps = float(data['fps'])
+        except: pass
+    
+    print('segment,frame_idx,timestamp_s,x,y,z')
+    for i, P in enumerate(poses):
+        if P.shape == (4,4):
+            P = P[:3]
+        R, t = P[:, :3], P[:, 3]
+        # extrinsic = world→cam ; cam position world = -R^T t
+        # but lingbot might save c2w directly; check determinant heuristic
+        pos = -R.T @ t  # if extrinsic
+        ts = t0 + i / fps
+        print(f'{args.label},{i},{ts:.6f},{pos[0]:.6f},{pos[1]:.6f},{pos[2]:.6f}')
+
+if __name__ == '__main__':
+    main()
--- a/scripts/stage06_align_absolute.py
+++ b/scripts/stage06_align_absolute.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+"""Stage 06 — absolute alignment of lingbot relative trajectory using MCAP IMU+USBL.
+Input  : trajectory CSV (segment, frame_idx, timestamp_s, x, y, z) + MCAP bag dir
+Output : absolute trajectory CSV (timestamp_s, lat, lon, alt, east_m, north_m, up_m, segment, lingbot_x, lingbot_y, lingbot_z)
+ plot PNG with absolute trajectory (spiral expected for lawnmower)
+
+Method:
+  - Parse MCAP : /mavros/imu/data + /mavros/global_position/global + /mavros/imu/static_pressure
+  - Convert lat/lon → ENU meters (origin = first fix)
+  - For each frame timestamp, find nearest GPS/IMU
+  - Umeyama similarity transform : align lingbot (x,y,z) → (east,north,depth)
+  - Output enhanced CSV + plot
+
+Usage:
+  python3 stage06_align_absolute.py --traj /tmp/auv213_full_trajectory.csv \
+      --mcap-dir /mnt/ssd/20260505-Lepradet/raw_data/logs/SUB/bag/20260505_150717_AUV013/ \
+      --out /tmp/auv213_absolute.csv --plot /tmp/auv213_absolute.png
+"""
+import argparse, csv, glob, os, sys, math
+import numpy as np
+from pathlib import Path
+from datetime import datetime, timezone
+
+def parse_mcap_bag(bag_dir):
+    """Return dict of {topic: [(ts_ns, msg_dict)]}."""
+    from rosbags.highlevel import AnyReader
+    bag_paths = sorted(Path(bag_dir).glob('*.mcap'))
+    if not bag_paths:
+        sys.exit(f'No .mcap in {bag_dir}')
+    print(f'[mcap] reading {len(bag_paths)} bag files', flush=True)
+    topics_of_interest = {
+        '/mavros/imu/data': 'Imu',
+        '/mavros/global_position/global': 'NavSatFix',
+        '/mavros/imu/static_pressure': 'FluidPressure',
+    }
+    data = {t: [] for t in topics_of_interest}
+    with AnyReader(bag_paths) as reader:
+        conns = [c for c in reader.connections if c.topic in topics_of_interest]
+        print(f'[mcap] connections: {[(c.topic, c.msgtype) for c in conns]}', flush=True)
+        for conn, ts_ns, raw in reader.messages(connections=conns):
+            try:
+                m = reader.deserialize(raw, conn.msgtype)
+                if conn.topic == '/mavros/imu/data':
+                    q = m.orientation
+                    data[conn.topic].append((ts_ns, {'qx': q.x, 'qy': q.y, 'qz': q.z, 'qw': q.w}))
+                elif conn.topic == '/mavros/global_position/global':
+                    if not (math.isnan(m.latitude) or math.isnan(m.longitude)):
+                        data[conn.topic].append((ts_ns, {'lat': m.latitude, 'lon': m.longitude, 'alt': m.altitude}))
+                elif conn.topic == '/mavros/imu/static_pressure':
+                    data[conn.topic].append((ts_ns, {'pressure_pa': m.fluid_pressure}))
+            except Exception as e:
+                continue
+    for t in data:
+        print(f'[mcap] {t}: {len(data[t])} samples', flush=True)
+    return data
+
+def latlon_to_enu(lat, lon, alt, lat0, lon0, alt0):
+    """Local ENU meters (flat Earth around (lat0, lon0))."""
+    R = 6378137.0
+    dlat = math.radians(lat - lat0)
+    dlon = math.radians(lon - lon0)
+    east = R * dlon * math.cos(math.radians(lat0))
+    north = R * dlat
+    up = alt - alt0
+    return east, north, up
+
+def nearest_ts(arr, ts_target_ns):
+    if not arr: return None
+    idx = np.searchsorted([a[0] for a in arr], ts_target_ns)
+    if idx == 0: return arr[0]
+    if idx >= len(arr): return arr[-1]
+    if abs(arr[idx][0] - ts_target_ns) < abs(arr[idx-1][0] - ts_target_ns):
+        return arr[idx]
+    return arr[idx-1]
+
+def umeyama(src, dst):
+    """Closed-form similarity transform (scale s, rotation R, translation t) minimizing ||s*R*src + t - dst||^2.
+    src, dst: (N, 3) arrays."""
+    src = np.asarray(src, dtype=np.float64)
+    dst = np.asarray(dst, dtype=np.float64)
+    mu_src = src.mean(axis=0)
+    mu_dst = dst.mean(axis=0)
+    src_c = src - mu_src
+    dst_c = dst - mu_dst
+    sigma_src = (src_c ** 2).sum() / len(src)
+    cov = (dst_c.T @ src_c) / len(src)
+    U, S, Vt = np.linalg.svd(cov)
+    D = np.eye(3)
+    if np.linalg.det(U @ Vt) < 0:
+        D[2, 2] = -1
+    R = U @ D @ Vt
+    s = (S * np.diag(D)).sum() / sigma_src
+    t = mu_dst - s * R @ mu_src
+    return s, R, t
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--traj', required=True)
+    ap.add_argument('--mcap-dir', required=True)
+    ap.add_argument('--out', required=True)
+    ap.add_argument('--plot', default=None)
+    ap.add_argument('--min-fixes', type=int, default=10, help='min GPS fixes to attempt Umeyama')
+    args = ap.parse_args()
+    
+    print(f'[traj] reading {args.traj}', flush=True)
+    rows = []
+    with open(args.traj) as f:
+        reader = csv.DictReader(f)
+        for r in reader:
+            rows.append({
+                'segment': r['segment'],
+                'frame_idx': int(r['frame_idx']),
+                'ts_s': float(r['timestamp_s']),
+                'x': float(r['x']),
+                'y': float(r['y']),
+                'z': float(r['z']),
+            })
+    print(f'[traj] {len(rows)} rows', flush=True)
+    
+    data = parse_mcap_bag(args.mcap_dir)
+    gps = data['/mavros/global_position/global']
+    imu = data['/mavros/imu/data']
+    press = data['/mavros/imu/static_pressure']
+    
+    if len(gps) < args.min_fixes:
+        print(f'[warn] only {len(gps)} GPS fixes (need >= {args.min_fixes}), skipping Umeyama. Will output raw + IMU only.', flush=True)
+    
+    # ENU origin = first GPS fix
+    if gps:
+        lat0 = gps[0][1]['lat']; lon0 = gps[0][1]['lon']; alt0 = gps[0][1]['alt']
+        print(f'[origin] lat0={lat0:.6f} lon0={lon0:.6f} alt0={alt0:.2f}', flush=True)
+    else:
+        lat0 = lon0 = alt0 = 0.0
+    
+    # Build per-frame absolute ENU using nearest GPS
+    src_lingbot, dst_enu, gps_per_frame, imu_per_frame, depth_per_frame = [], [], [], [], []
+    for r in rows:
+        ts_ns = int(r['ts_s'] * 1e9)
+        g = nearest_ts(gps, ts_ns)
+        i = nearest_ts(imu, ts_ns)
+        p = nearest_ts(press, ts_ns)
+        # Always store IMU+depth
+        imu_per_frame.append(i[1] if i else None)
+        if p:
+            depth_m = (p[1]['pressure_pa'] - 101325.0) / (1025.0 * 9.81)
+            depth_per_frame.append(depth_m)
+        else:
+            depth_per_frame.append(None)
+        gps_per_frame.append(g[1] if g else None)
+        if g and abs(g[0] - ts_ns) < 2e9:  # GPS within 2s
+            e, n, u = latlon_to_enu(g[1]['lat'], g[1]['lon'], g[1]['alt'], lat0, lon0, alt0)
+            src_lingbot.append([r['x'], r['y'], r['z']])
+            dst_enu.append([e, n, u])
+    
+    # Umeyama
+    if len(src_lingbot) >= args.min_fixes:
+        s, R, t = umeyama(src_lingbot, dst_enu)
+        print(f'[umeyama] scale={s:.4f} translation={t}', flush=True)
+        print(f'[umeyama] rotation_matrix=\n{R}', flush=True)
+    else:
+        s, R, t = 1.0, np.eye(3), np.zeros(3)
+        print(f'[umeyama] insufficient pairs, identity transform', flush=True)
+    
+    # Apply transform
+    out_rows = []
+    for i_row, r in enumerate(rows):
+        p_lingbot = np.array([r['x'], r['y'], r['z']])
+        p_abs = s * R @ p_lingbot + t
+        g = gps_per_frame[i_row]
+        im = imu_per_frame[i_row]
+        d = depth_per_frame[i_row]
+        out_rows.append({
+            'segment': r['segment'],
+            'frame_idx': r['frame_idx'],
+            'timestamp_s': r['ts_s'],
+            'east_m': p_abs[0],
+            'north_m': p_abs[1],
+            'up_m': p_abs[2],
+            'depth_m': d if d is not None else '',
+            'gps_lat': g['lat'] if g else '',
+            'gps_lon': g['lon'] if g else '',
+            'imu_qw': im['qw'] if im else '',
+            'imu_qx': im['qx'] if im else '',
+            'imu_qy': im['qy'] if im else '',
+            'imu_qz': im['qz'] if im else '',
+            'lingbot_x': r['x'],
+            'lingbot_y': r['y'],
+            'lingbot_z': r['z'],
+        })
+    
+    with open(args.out, 'w', newline='') as f:
+        w = csv.DictWriter(f, fieldnames=list(out_rows[0].keys()))
+        w.writeheader(); w.writerows(out_rows)
+    print(f'[out] {args.out} {len(out_rows)} rows', flush=True)
+    
+    if args.plot:
+        import matplotlib
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+        fig, axes = plt.subplots(2, 2, figsize=(14, 12))
+        ax_xy, ax_xz, ax_yz, ax_dt = axes[0,0], axes[0,1], axes[1,0], axes[1,1]
+        colors = {'GX020030':'tab:red','GX030030':'tab:blue','GX040030':'tab:green','GX050030':'tab:orange'}
+        by_seg = {}
+        for r in out_rows:
+            by_seg.setdefault(r['segment'], []).append(r)
+        # plot absolute trajectory
+        for seg, rs in by_seg.items():
+            e=[r['east_m'] for r in rs]; n=[r['north_m'] for r in rs]; u=[r['up_m'] for r in rs]; d=[r['depth_m'] if r['depth_m']!='' else 0 for r in rs]
+            c = colors.get(seg, 'gray')
+            ax_xy.plot(e, n, '-', color=c, label=seg, alpha=0.7, linewidth=1)
+            ax_xy.plot(e[0], n[0], 'o', color=c, markersize=8)
+            ax_xz.plot(e, u, '-', color=c, alpha=0.7, linewidth=1)
+            ax_yz.plot(n, u, '-', color=c, alpha=0.7, linewidth=1)
+            t0 = rs[0]['timestamp_s']
+            ax_dt.plot([(r['timestamp_s']-t0)/60 for r in rs], d, '-', color=c, alpha=0.8, linewidth=1, label=seg)
+        # also plot GPS fixes
+        gps_e = [latlon_to_enu(g[1]['lat'], g[1]['lon'], g[1]['alt'], lat0, lon0, alt0)[0] for g in gps] if gps else []
+        gps_n = [latlon_to_enu(g[1]['lat'], g[1]['lon'], g[1]['alt'], lat0, lon0, alt0)[1] for g in gps] if gps else []
+        if gps_e:
+            ax_xy.plot(gps_e, gps_n, 'k.', markersize=2, alpha=0.5, label='GPS fixes')
+        ax_xy.set_xlabel('East (m)'); ax_xy.set_ylabel('North (m)'); ax_xy.set_title('Top view ENU')
+        ax_xy.set_aspect('equal'); ax_xy.legend(loc='best', fontsize=8); ax_xy.grid(True, alpha=0.3)
+        ax_xz.set_xlabel('East (m)'); ax_xz.set_ylabel('Up (m)'); ax_xz.set_title('Side East-Up'); ax_xz.set_aspect('equal'); ax_xz.grid(True, alpha=0.3)
+        ax_yz.set_xlabel('North (m)'); ax_yz.set_ylabel('Up (m)'); ax_yz.set_title('Side North-Up'); ax_yz.set_aspect('equal'); ax_yz.grid(True, alpha=0.3)
+        ax_dt.set_xlabel('Time (min)'); ax_dt.set_ylabel('Depth (m, from pressure)'); ax_dt.set_title('Depth over time (MCAP pressure)'); ax_dt.grid(True, alpha=0.3); ax_dt.legend(loc='best', fontsize=8); ax_dt.invert_yaxis()
+        fig.suptitle('AUV213 trajectory — absolute (Umeyama lingbot → ENU)')
+        plt.tight_layout()
+        plt.savefig(args.plot, dpi=130, bbox_inches='tight')
+        print(f'[plot] {args.plot}', flush=True)
+
+if __name__ == '__main__':
+    main()
--- a/scripts/stage06b_imu_depth_align.py
+++ b/scripts/stage06b_imu_depth_align.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+"""Stage 06b — IMU gravity + depth alignment with arbitrary origin.
+
+Method (no GPS/USBL):
+  1. Read MCAP for /mavros/imu/data (orientation) + /mavros/imu/static_pressure (depth)
+  2. For each frame ts, find nearest IMU+depth
+  3. Compute rotation = inverse of IMU body-to-world quaternion at frame 0
+     → rotates lingbot frame so World Up is +Z
+  4. Scale Z so range matches real depth (pressure-derived)
+  5. Place origin at (east0, north0, 0) arbitrary
+  6. Output CSV + plot trajectory in local ENU (origin = user-given coords)
+
+Usage:
+  python3 stage06b_imu_depth_align.py \
+    --traj /tmp/auv213_full_trajectory.csv \
+    --mcap-dir /mnt/ssd/.../20260505_150717_AUV013/ \
+    --east0 1000 --north0 5000 \
+    --out /tmp/auv213_aligned.csv --plot /tmp/auv213_aligned.png
+"""
+import argparse, csv, math, sys
+import numpy as np
+from pathlib import Path
+
+def quat_to_rot(qw, qx, qy, qz):
+    """Quaternion to 3x3 rotation matrix (body→world for mavros convention)."""
+    n = math.sqrt(qw*qw + qx*qx + qy*qy + qz*qz)
+    if n < 1e-9: return np.eye(3)
+    qw, qx, qy, qz = qw/n, qx/n, qy/n, qz/n
+    return np.array([
+        [1 - 2*(qy*qy + qz*qz), 2*(qx*qy - qz*qw),     2*(qx*qz + qy*qw)],
+        [2*(qx*qy + qz*qw),     1 - 2*(qx*qx + qz*qz), 2*(qy*qz - qx*qw)],
+        [2*(qx*qz - qy*qw),     2*(qy*qz + qx*qw),     1 - 2*(qx*qx + qy*qy)],
+    ])
+
+def parse_mcap(bag_dir):
+    from rosbags.highlevel import AnyReader
+    bags = sorted(Path(bag_dir).glob('*.mcap'))
+    data = {'imu': [], 'press': []}
+    with AnyReader(bags) as reader:
+        for conn, ts_ns, raw in reader.messages(connections=[c for c in reader.connections if c.topic in ['/mavros/imu/data','/mavros/imu/static_pressure']]):
+            m = reader.deserialize(raw, conn.msgtype)
+            if conn.topic == '/mavros/imu/data':
+                q = m.orientation
+                data['imu'].append((ts_ns, [q.w, q.x, q.y, q.z]))
+            else:
+                data['press'].append((ts_ns, m.fluid_pressure))
+    data['imu'].sort(); data['press'].sort()
+    return data
+
+def nearest(arr, ts_ns):
+    if not arr: return None
+    ks = [a[0] for a in arr]
+    idx = np.searchsorted(ks, ts_ns)
+    if idx == 0: return arr[0]
+    if idx >= len(arr): return arr[-1]
+    return arr[idx] if abs(arr[idx][0]-ts_ns) < abs(arr[idx-1][0]-ts_ns) else arr[idx-1]
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--traj', required=True)
+    ap.add_argument('--mcap-dir', required=True)
+    ap.add_argument('--east0', type=float, default=1000.0)
+    ap.add_argument('--north0', type=float, default=5000.0)
+    ap.add_argument('--out', required=True)
+    ap.add_argument('--plot', default=None)
+    args = ap.parse_args()
+
+    rows = []
+    with open(args.traj) as f:
+        for r in csv.DictReader(f):
+            rows.append({'segment': r['segment'], 'frame_idx': int(r['frame_idx']),
+                'ts_s': float(r['timestamp_s']), 'p': np.array([float(r['x']), float(r['y']), float(r['z'])])})
+    print(f'[traj] {len(rows)} rows', flush=True)
+
+    data = parse_mcap(args.mcap_dir)
+    print(f'[mcap] imu={len(data["imu"])} press={len(data["press"])}', flush=True)
+
+    # 1. Gravity alignment using IMU at first frame ts
+    ts0_ns = int(rows[0]['ts_s'] * 1e9)
+    imu0 = nearest(data['imu'], ts0_ns)
+    if imu0 is None:
+        print('[err] no IMU sample', flush=True); sys.exit(1)
+    R_body2world = quat_to_rot(*imu0[1])
+    # Camera looks down on AUV → cam optical axis = body -Z. So cam Z (depth) in world = -R_body2world @ [0,0,1] = up direction inverted
+    # In lingbot frame, frame 0 sets identity. So lingbot_R_world = R_body2world
+    # World coords from lingbot coords: p_world = R_body2world @ p_lingbot
+    # But this assumes lingbot world axes coincide with body frame at t=0
+    R_world = R_body2world  # rough first-order approximation
+    print(f'[gravity] IMU q0 = {imu0[1]} -> R_body2world =', flush=True)
+    print(R_world, flush=True)
+
+    # 2. Depth scale: match lingbot Z range to real depth range from pressure
+    depths = []
+    for r in rows:
+        ts_ns = int(r['ts_s']*1e9)
+        p = nearest(data['press'], ts_ns)
+        if p:
+            d = (p[1] - 101325.0) / (1025.0 * 9.81)
+            depths.append(d)
+        else:
+            depths.append(None)
+    valid_depths = [d for d in depths if d is not None and d > 0.1]
+    if valid_depths:
+        depth_min, depth_max = min(valid_depths), max(valid_depths)
+        depth_range = depth_max - depth_min
+        print(f'[depth] real range: {depth_min:.2f}m to {depth_max:.2f}m ({depth_range:.2f}m)', flush=True)
+    else:
+        depth_min = depth_max = depth_range = 0
+        print('[depth] no valid pressure data', flush=True)
+
+    # apply rotation (no scaling yet)
+    rotated = np.array([R_world @ r['p'] for r in rows])
+    z_min, z_max = rotated[:, 2].min(), rotated[:, 2].max()
+    z_range = z_max - z_min
+    print(f'[lingbot after rot] Z range: {z_min:.2f} to {z_max:.2f} ({z_range:.2f}m)', flush=True)
+
+    # scale to match depth range (if both available and meaningful)
+    if depth_range > 0.5 and z_range > 0.1:
+        scale_z = depth_range / z_range
+        print(f'[scale_z] {scale_z:.3f}', flush=True)
+    else:
+        scale_z = 1.0
+        print(f'[scale_z] 1.0 (insufficient data)', flush=True)
+
+    # Apply isotropic scale (since the camera tracking is consistent in 3 axes)
+    rotated_scaled = rotated * scale_z
+
+    # Translate origin to user-given (east, north, 0)
+    east0, north0 = args.east0, args.north0
+    final = rotated_scaled.copy()
+    final[:, 0] += east0 - rotated_scaled[0, 0]
+    final[:, 1] += north0 - rotated_scaled[0, 1]
+    # Z aligned to depth_min if available else first frame
+    if valid_depths:
+        z_offset = -depth_min - final[0, 2]  # depth is positive down ; up = -depth
+        final[:, 2] += z_offset
+
+    with open(args.out, 'w', newline='') as f:
+        w = csv.writer(f)
+        w.writerow(['segment','frame_idx','timestamp_s','east_m','north_m','up_m','depth_real_m','lingbot_x','lingbot_y','lingbot_z'])
+        for i, r in enumerate(rows):
+            w.writerow([r['segment'], r['frame_idx'], f"{r['ts_s']:.6f}",
+                f"{final[i,0]:.4f}", f"{final[i,1]:.4f}", f"{final[i,2]:.4f}",
+                f"{depths[i]:.3f}" if depths[i] is not None else '',
+                f"{r['p'][0]:.4f}", f"{r['p'][1]:.4f}", f"{r['p'][2]:.4f}"])
+    print(f'[out] {args.out}', flush=True)
+
+    if args.plot:
+        import matplotlib
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+        fig, axes = plt.subplots(2, 2, figsize=(14, 12))
+        ax_xy, ax_xz, ax_yz, ax_d = axes[0,0], axes[0,1], axes[1,0], axes[1,1]
+        colors = {'GX020030':'tab:red','GX030030':'tab:blue','GX040030':'tab:green','GX050030':'tab:orange'}
+        by_seg = {}
+        for i, r in enumerate(rows):
+            by_seg.setdefault(r['segment'], []).append((final[i], depths[i], r['ts_s']))
+        t0 = rows[0]['ts_s']
+        for seg, lst in by_seg.items():
+            e = [v[0][0] for v in lst]; n = [v[0][1] for v in lst]; u = [v[0][2] for v in lst]
+            d_real = [v[1] if v[1] is not None else 0 for v in lst]
+            ts = [(v[2]-t0)/60 for v in lst]
+            c = colors.get(seg, 'gray')
+            ax_xy.plot(e, n, '-', color=c, label=f'{seg} ({len(lst)})', linewidth=1.2, alpha=0.8)
+            ax_xy.plot(e[0], n[0], 'o', color=c, markersize=8)
+            ax_xz.plot(e, u, '-', color=c, linewidth=1.2, alpha=0.8)
+            ax_yz.plot(n, u, '-', color=c, linewidth=1.2, alpha=0.8)
+            ax_d.plot(ts, d_real, '-', color=c, linewidth=1.2, alpha=0.8, label=seg)
+        ax_xy.set_xlabel('East (m, origin=1000)'); ax_xy.set_ylabel('North (m, origin=5000)')
+        ax_xy.set_title('Top view ENU — gravity+depth aligned'); ax_xy.set_aspect('equal'); ax_xy.legend(fontsize=8); ax_xy.grid(True, alpha=0.3)
+        ax_xz.set_xlabel('East (m)'); ax_xz.set_ylabel('Up (m)'); ax_xz.set_title('East-Up'); ax_xz.set_aspect('equal'); ax_xz.grid(True, alpha=0.3)
+        ax_yz.set_xlabel('North (m)'); ax_yz.set_ylabel('Up (m)'); ax_yz.set_title('North-Up'); ax_yz.set_aspect('equal'); ax_yz.grid(True, alpha=0.3)
+        ax_d.set_xlabel('Time (min)'); ax_d.set_ylabel('Depth real (m, from pressure)'); ax_d.set_title('Real depth from FluidPressure'); ax_d.legend(fontsize=8); ax_d.grid(True, alpha=0.3); ax_d.invert_yaxis()
+        fig.suptitle('AUV213 — gravity+depth aligned (origin 1000,5000 ; from IMU q0 + pressure scale)')
+        plt.tight_layout()
+        plt.savefig(args.plot, dpi=130, bbox_inches='tight')
+        print(f'[plot] {args.plot}', flush=True)
+
+if __name__ == '__main__': main()
--- a/scripts/viser_auv.py
+++ b/scripts/viser_auv.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+"""Open viser viewer with all PLYs from one AUV.
+
+Usage:
+    viser_auv.py --ply-dir /path/to/auv/ply --port 9210
+"""
+from __future__ import annotations
+
+import argparse
+import sys
+import time
+from pathlib import Path
+
+import numpy as np
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--ply-dir", required=True)
+    ap.add_argument("--port", type=int, default=9210)
+    ap.add_argument("--point-size", type=float, default=0.01)
+    ap.add_argument("--max-points-per-ply", type=int, default=1_500_000)
+    args = ap.parse_args()
+
+    try:
+        import open3d as o3d
+        import viser
+    except ImportError as e:
+        sys.exit(f"missing dep: {e}")
+
+    ply_dir = Path(args.ply_dir)
+    plys = sorted(ply_dir.glob("**/*.ply"))
+    print(f"Found {len(plys)} PLY files in {ply_dir}", flush=True)
+    if not plys:
+        sys.exit("no PLY found")
+
+    server = viser.ViserServer(host="0.0.0.0", port=args.port)
+    palette = [
+        (1.0, 0.30, 0.30), (0.30, 1.0, 0.30), (0.30, 0.55, 1.0),
+        (1.0, 0.85, 0.20), (1.0, 0.30, 1.0), (0.30, 1.0, 1.0),
+        (1.0, 0.55, 0.20), (0.55, 0.30, 1.0),
+    ]
+
+    for i, p in enumerate(plys):
+        pcd = o3d.io.read_point_cloud(str(p))
+        pts = np.asarray(pcd.points, dtype=np.float32)
+        if len(pts) == 0:
+            print(f"  ! {p.name}: empty", flush=True)
+            continue
+        if pcd.has_colors():
+            cols = np.asarray(pcd.colors, dtype=np.float32)
+        else:
+            cols = np.tile(palette[i % len(palette)], (len(pts), 1)).astype(np.float32)
+        if len(pts) > args.max_points_per_ply:
+            idx = np.random.choice(len(pts), args.max_points_per_ply, replace=False)
+            pts = pts[idx]
+            cols = cols[idx]
+        # viser wants uint8 colors
+        cols_u8 = (cols * 255).clip(0, 255).astype(np.uint8)
+        name = f"/{p.parent.name}_{p.stem}"
+        server.scene.add_point_cloud(
+            name=name, points=pts, colors=cols_u8, point_size=args.point_size
+        )
+        print(f"  + {p.name}: {len(pts):,} pts", flush=True)
+
+    print(f"Viser ready on port {args.port}", flush=True)
+    while True:
+        time.sleep(60)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/vo_classic_opencv.py
+++ b/scripts/vo_classic_opencv.py
@@ -0,0 +1,197 @@
+#!/usr/bin/env python3
+"""Classic Visual Odometry with OpenCV — lightweight CPU.
+Pipeline:
+  1. ORB features per frame + match consecutive (or KLT track keypoints)
+  2. Filter outliers via cv2.findEssentialMat RANSAC
+  3. cv2.recoverPose → R, t up-to-scale per pair
+  4. Concatenate to global pose chain
+  5. Output CSV (frame_idx, ts_s, x, y, z, qw, qx, qy, qz)
+
+Usage:
+  python3 vo_classic_opencv.py --frames-dir /home/cosma/cosma-pipeline/data/<mission>/frames/<AUV>/<SEGMENT> \
+      --start-iso 2026-05-05T08:33:41 --fps 1.0 --label GX039839 --out /tmp/vo_classic.csv \
+      --plot /tmp/vo_classic.png
+"""
+import argparse, csv, sys, math
+from pathlib import Path
+import numpy as np
+import cv2
+from datetime import datetime
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument('--frames-dir', required=True)
+    ap.add_argument('--start-iso', default='2026-05-05T00:00:00')
+    ap.add_argument('--fps', type=float, default=1.0)
+    ap.add_argument('--label', default='segment')
+    ap.add_argument('--out', required=True)
+    ap.add_argument('--plot', default=None)
+    ap.add_argument('--ref-csv', default=None, help='lingbot CSV to compare against (same segment)')
+    ap.add_argument('--max-features', type=int, default=2000)
+    ap.add_argument('--method', choices=['orb','klt'], default='klt')
+    args = ap.parse_args()
+    
+    frames = sorted(Path(args.frames_dir).glob('frame_*.jpg'))
+    if not frames:
+        sys.exit(f'no frames in {args.frames_dir}')
+    print(f'[vo] {len(frames)} frames in {args.frames_dir}', flush=True)
+    
+    # camera intrinsics for 518x294 GoPro wide @ ~122° hFOV
+    W, H = 518, 294
+    # focal estimate from FOV
+    fov_h_deg = 122.0
+    f = (W / 2.0) / math.tan(math.radians(fov_h_deg / 2.0))
+    cx, cy = W/2, H/2
+    K = np.array([[f, 0, cx], [0, f, cy], [0, 0, 1]], dtype=np.float64)
+    print(f'[vo] K = focal={f:.1f}, cx={cx}, cy={cy}', flush=True)
+    
+    # Init pose
+    R_world = np.eye(3)
+    t_world = np.zeros((3, 1))
+    
+    out_rows = []
+    out_rows.append({'label': args.label, 'frame_idx': 0, 'ts_s': datetime.fromisoformat(args.start_iso).timestamp(), 
+                     'x': 0.0, 'y': 0.0, 'z': 0.0, 'inliers': 0, 'tracked': 0})
+    
+    prev_gray = cv2.imread(str(frames[0]), cv2.IMREAD_GRAYSCALE)
+    if args.method == 'klt':
+        # Initial corners via goodFeaturesToTrack
+        prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=args.max_features, qualityLevel=0.01, minDistance=7, blockSize=7)
+    else:
+        orb = cv2.ORB_create(args.max_features)
+        prev_kp, prev_desc = orb.detectAndCompute(prev_gray, None)
+        bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
+    
+    t0 = datetime.fromisoformat(args.start_iso).timestamp()
+    fail_count = 0
+    
+    for i in range(1, len(frames)):
+        curr_gray = cv2.imread(str(frames[i]), cv2.IMREAD_GRAYSCALE)
+        if curr_gray is None: continue
+        
+        # 1. Match/track features
+        if args.method == 'klt':
+            if prev_pts is None or len(prev_pts) < 50:
+                prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=args.max_features, qualityLevel=0.01, minDistance=7, blockSize=7)
+            if prev_pts is None or len(prev_pts) < 50:
+                fail_count += 1
+                out_rows.append({'label': args.label, 'frame_idx': i, 'ts_s': t0 + i/args.fps, 
+                                 'x': t_world[0,0], 'y': t_world[1,0], 'z': t_world[2,0], 'inliers': 0, 'tracked': 0})
+                prev_gray = curr_gray
+                continue
+            curr_pts, status, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, prev_pts, None, winSize=(21,21), maxLevel=3)
+            good_prev = prev_pts[status.flatten() == 1]
+            good_curr = curr_pts[status.flatten() == 1]
+            n_tracked = len(good_prev)
+        else:  # orb
+            curr_kp, curr_desc = orb.detectAndCompute(curr_gray, None)
+            if prev_desc is None or curr_desc is None or len(curr_kp) < 50:
+                fail_count += 1
+                out_rows.append({'label': args.label, 'frame_idx': i, 'ts_s': t0 + i/args.fps, 
+                                 'x': t_world[0,0], 'y': t_world[1,0], 'z': t_world[2,0], 'inliers': 0, 'tracked': 0})
+                prev_gray = curr_gray; prev_kp = curr_kp; prev_desc = curr_desc
+                continue
+            matches = bf.match(prev_desc, curr_desc)
+            matches = sorted(matches, key=lambda m: m.distance)[:500]
+            good_prev = np.array([prev_kp[m.queryIdx].pt for m in matches], dtype=np.float32).reshape(-1, 1, 2)
+            good_curr = np.array([curr_kp[m.trainIdx].pt for m in matches], dtype=np.float32).reshape(-1, 1, 2)
+            n_tracked = len(matches)
+        
+        if n_tracked < 30:
+            fail_count += 1
+            out_rows.append({'label': args.label, 'frame_idx': i, 'ts_s': t0 + i/args.fps, 
+                             'x': t_world[0,0], 'y': t_world[1,0], 'z': t_world[2,0], 'inliers': 0, 'tracked': n_tracked})
+            prev_gray = curr_gray
+            if args.method == 'klt':
+                prev_pts = cv2.goodFeaturesToTrack(curr_gray, maxCorners=args.max_features, qualityLevel=0.01, minDistance=7, blockSize=7)
+            else:
+                prev_kp, prev_desc = curr_kp, curr_desc
+            continue
+        
+        # 2. Essential matrix + recoverPose
+        try:
+            E, mask = cv2.findEssentialMat(good_curr.reshape(-1,2), good_prev.reshape(-1,2), K, method=cv2.RANSAC, prob=0.999, threshold=1.0)
+            if E is None or E.shape != (3,3):
+                raise ValueError('bad E')
+            _, R, t, mask_pose = cv2.recoverPose(E, good_curr.reshape(-1,2), good_prev.reshape(-1,2), K, mask=mask)
+            n_inliers = int(mask_pose.sum()) if mask_pose is not None else 0
+        except Exception as e:
+            fail_count += 1
+            out_rows.append({'label': args.label, 'frame_idx': i, 'ts_s': t0 + i/args.fps, 
+                             'x': t_world[0,0], 'y': t_world[1,0], 'z': t_world[2,0], 'inliers': 0, 'tracked': n_tracked})
+            prev_gray = curr_gray
+            if args.method == 'klt':
+                prev_pts = cv2.goodFeaturesToTrack(curr_gray, maxCorners=args.max_features, qualityLevel=0.01, minDistance=7, blockSize=7)
+            else:
+                prev_kp, prev_desc = curr_kp, curr_desc
+            continue
+        
+        # 3. Update global pose : R_world = R_world @ R^T ; t_world = t_world - R_world @ t
+        # (camera convention: R maps prev to curr in cam frame, t is unit baseline)
+        # Pose update for VO:
+        t_world = t_world + R_world @ t
+        R_world = R_world @ R
+        
+        out_rows.append({'label': args.label, 'frame_idx': i, 'ts_s': t0 + i/args.fps, 
+                         'x': t_world[0,0], 'y': t_world[1,0], 'z': t_world[2,0], 
+                         'inliers': n_inliers, 'tracked': n_tracked})
+        
+        # carry forward
+        prev_gray = curr_gray
+        if args.method == 'klt':
+            prev_pts = cv2.goodFeaturesToTrack(curr_gray, maxCorners=args.max_features, qualityLevel=0.01, minDistance=7, blockSize=7)
+        else:
+            prev_kp, prev_desc = curr_kp, curr_desc
+        
+        if i % 100 == 0:
+            print(f'[vo] frame {i}/{len(frames)} tracked={n_tracked} inliers={n_inliers} pos=({t_world[0,0]:.2f},{t_world[1,0]:.2f},{t_world[2,0]:.2f})', flush=True)
+    
+    print(f'[vo] done. Total frames: {len(out_rows)}. Failed pairs: {fail_count}', flush=True)
+    
+    with open(args.out, 'w', newline='') as f:
+        w = csv.DictWriter(f, fieldnames=['label','frame_idx','ts_s','x','y','z','inliers','tracked'])
+        w.writeheader(); w.writerows(out_rows)
+    print(f'[out] {args.out}', flush=True)
+    
+    if args.plot:
+        import matplotlib
+        matplotlib.use('Agg')
+        import matplotlib.pyplot as plt
+        x = [r['x'] for r in out_rows]
+        y = [r['y'] for r in out_rows]
+        z = [r['z'] for r in out_rows]
+        fig, axes = plt.subplots(2, 2, figsize=(14, 12))
+        ax_xy, ax_xz, ax_yz, ax_qual = axes[0,0], axes[0,1], axes[1,0], axes[1,1]
+        ax_xy.plot(x, y, '-b', linewidth=1, alpha=0.7, label='VO classic')
+        ax_xy.plot(x[0], y[0], 'go', markersize=10, label='start')
+        ax_xy.plot(x[-1], y[-1], 'r^', markersize=10, label='end')
+        if args.ref_csv:
+            try:
+                with open(args.ref_csv) as ff:
+                    refrows = list(csv.DictReader(ff))
+                rx = [float(r['x']) for r in refrows if r.get('segment','') == args.label]
+                ry = [float(r['y']) for r in refrows if r.get('segment','') == args.label]
+                if rx:
+                    ax_xy.plot(rx, ry, '-r', linewidth=1, alpha=0.5, label='lingbot')
+            except Exception as e:
+                print(f'[plot] ref_csv load fail: {e}')
+        ax_xy.set_xlabel('X (m, up-to-scale)'); ax_xy.set_ylabel('Y'); ax_xy.set_title(f'Top X-Y — VO classique {args.label}')
+        ax_xy.set_aspect('equal'); ax_xy.legend(); ax_xy.grid(True, alpha=0.3)
+        
+        ax_xz.plot(x, z, '-b', linewidth=1)
+        ax_xz.set_xlabel('X'); ax_xz.set_ylabel('Z'); ax_xz.set_title('Side X-Z'); ax_xz.set_aspect('equal'); ax_xz.grid(True, alpha=0.3)
+        ax_yz.plot(y, z, '-b', linewidth=1)
+        ax_yz.set_xlabel('Y'); ax_yz.set_ylabel('Z'); ax_yz.set_title('Side Y-Z'); ax_yz.set_aspect('equal'); ax_yz.grid(True, alpha=0.3)
+        
+        tracked = [r['tracked'] for r in out_rows]
+        inliers = [r['inliers'] for r in out_rows]
+        ax_qual.plot(tracked, label='tracked features', color='blue', alpha=0.6)
+        ax_qual.plot(inliers, label='RANSAC inliers', color='red', alpha=0.6)
+        ax_qual.set_xlabel('Frame'); ax_qual.set_ylabel('Count'); ax_qual.set_title('Tracking quality'); ax_qual.legend(); ax_qual.grid(True, alpha=0.3)
+        
+        plt.suptitle(f'Visual Odometry classique (OpenCV {args.method.upper()}) — {args.label}')
+        plt.tight_layout()
+        plt.savefig(args.plot, dpi=130, bbox_inches='tight')
+        print(f'[plot] {args.plot}')
+
+if __name__ == '__main__': main()
Author	SHA1	Message	Date
Poulpe	171f90ce9f	stage03b: trim videos per run + ours rough cut LRV proxy (GoPro low-res 768x432 H.264) + ffmpeg -c copy keyframe-aligned. Inputs: 02_runs.json + 03_video_index.json. Outputs: per-run mp4 + ours_<gp>.mp4 chrono concat. Tested on 20260505-Lepradet: 5 files + 2 ours (~11 GB total). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-16 16:05:41 +00:00
Ubuntu	754f3c7272	stage01: filtre old/, fusion AUV0xx<->AUV2xx, focus date mission - _is_old_path skip tout path contenant /old/ - _normalize_auv_id: AUV010->AUV210, AUV012->AUV212, AUV013->AUV213 - _parse_mission_date: regex YYYYMMDD du nom mission - _mission_date_window: filtre coverage hors [date-2h, date+26h] UTC - manifest: mission_date + mission_date_window_utc + n_filtered_old + n_filtered_out_of_date	2026-05-15 10:48:22 +00:00
Ubuntu	90621dea12	stage01: extraction timestamps internes + mission/auv windows - ffprobe SMPTE pour MP4 GoPro (priorité) - mcap.reader pour bag ROS2 - pymavlink pour BIN ArduSub (fallback mtime si fail) - head/tail CSV USBL et MAG - regex filename pour KLF Kogger - mission_window global + auv_windows avec gaps détection (>60s)	2026-05-15 10:10:01 +00:00
Ubuntu	15b4ddfd70	stage01: ajoute collectors MAG (cosmag csv ar/av/side) + SSS (Kogger klf/bin) Manifest 01 enrichi avec deux nouveaux champs: - sss_files: {klf,bin} relatifs sous raw_data/sss/ - mag_files: {ar,av,side,other} csv sous raw_data/mag/ Tests: - 20260505-Lepradet: sss=1 mag=0 (mag vide attendu) - 20260508-sttropez: sss=53 mag=73 (match scout) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-05-15 09:53:01 +00:00
Ubuntu	65bda7ff71	stage02: filtre strict v5 (pct=80 dur=60 depth=-3m) + stage02b diag plots Defaults plus stricts pour éliminer surface/yoyo: - min_near_bottom_pct: 50 -> 80 % - min_sustained_duration: 30 -> 60 s - min_mission_depth: -2 -> -3 m - min_displacement_m: 5.0 documenté (stage06+ futur) Nouveau script 02b_runs_diag.py: 4-panel PNG par run OK+rejected (rel_alt+threshold, MAVROS state, depth histo, verdict criteria) + index.html pour inspection visuelle Flag. Test Lepradet: 5 -> 1 run OK (AUV210_run_00 79s -13m 81pct) Page publiée: laboratoire.freeboxos.fr/02-runs-diag-lepradet/	2026-05-15 09:53:01 +00:00
Ubuntu	2858217897	stage02: filtre pre-water runs avant première submersion réelle - find_first_submersion_epoch(): premier rel_alt < -2m pendant >= 5s continu - detect_runs_state_based(): rejette runs avant first_sub_epoch, tronque chevauchants - CLI: --first-submersion-depth (default -2.0) + --first-submersion-duration (default 5.0) - JSON output: first_submersion_epoch + pre_water_rejected_count par AUV - Lepradet: AUV212 run_00 tronqué 25s, AUV213 run_00 tronqué 11s	2026-05-15 09:53:01 +00:00
Poulpe	568ff9469b	auto-iter 2026-05-14: iteration-log iter10 — RoPE fix + PR#13 merge + .83 blocker Co-Authored-By: Poulpe <claude@nowyouknow.fr>	2026-05-14 04:56:09 +00:00
Poulpe	2611a72aa2	auto-iter 2026-05-14: max_frame_num 1024→2048 fix RoPE overflow GX019817 Root cause: 3D RoPE precomputed for max_frame_num+100=1124 positions. GX019817 has 1357 frames after trim → index overflow → tensor mismatch. 2048 → supports up to 2148 frames (covers all current segments). Co-Authored-By: Poulpe <claude@nowyouknow.fr>	2026-05-14 04:52:20 +00:00
Poulpe (Gouvernance Gitea)	50ca77490d	Merge pull request 'fix: 05_inference viser-kill + background-poll + offload_to_cpu from yaml' (#13 ) from fix/05-inference-viser-kill-offload into feature/auto-pipeline	2026-05-14 04:47:10 +00:00
Poulpe	503d6d64c2	iter-9: veille + stage06 path analysis	2026-05-13 23:03:19 +00:00
Poulpe	38dbcfd46f	auto-iter 20260513-2231: GX019817 RoPE skip, 4 PLY done ready for stage06	2026-05-13 23:02:31 +00:00
Poulpe	091ffeb2f6	chore: iter-8 log + veille (2026-05-13)	2026-05-13 16:44:18 +00:00
Poulpe	13323f2edf	fix: 05_inference — kill stale demo.py + background poll exit viser + offload_to_cpu from yaml - kill_stale_demo_py() before each segment to prevent GPU contention from orphan processes - Remote script runs demo.py in background via nohup, polls for PLY file every 30s, kills viser server once PLY written — prevents indefinite SSH block on viser listener - offload_to_cpu now read from thresholds.yaml[inference] (default false for 24GB VRAM) - timeout reads inference_timeout_s from yaml (already 10800s) - min_frames guard included (from fix/05-inference-min-frames-timeout) Root cause: demo.py starts viser server after writing PLY; SSH timed out → orphan; two orphans competed for GPU with offload_to_cpu → pure CPU inference = 6h+ for 493 frames	2026-05-13 16:41:18 +00:00
Poulpe	c55700677e	auto-iter 2026-05-13: offload_to_cpu=false (.84 24GB VRAM, no CPU offload needed)	2026-05-13 16:39:51 +00:00
Poulpe	ba92d68492	chore: iter-7 veille + log (2026-05-13)	2026-05-13 10:42:37 +00:00
Poulpe	c7c4431e72	auto-iter 2026-05-13: inference min_frames=32 + timeout 3h (was 2h) - min_frames_for_inference: 32 (RoPE/attention needs ≥32 frames) - inference_timeout_s: 10800 (GX029818 timed out at 7200s with 493 frames) Authored-by: Poulpe <claude@nowyouknow.fr>	2026-05-13 10:36:28 +00:00
Poulpe	1f1502e67c	auto-iter 2026-05-12: log iter-5 + veille + merge PR#10 fix streaming params	2026-05-12 22:49:59 +00:00
Ubuntu	81752163d2	Merge branch 'fix/05-inference-yaml-params' into feature/auto-pipeline	2026-05-12 22:46:30 +00:00
Poulpe	c06dd774ac	auto-iter 2026-05-12: log iter-4 + veille	2026-05-12 16:43:05 +00:00
Poulpe	3a6b058f0d	fix: 05_inference.py lit thresholds.yaml[inference] au lieu de windowed hardcodé - Ajoute _load_inference_cfg() qui lit config/thresholds.yaml - Mode/conf/keyframe_interval/max_frame_num depuis config (streaming par défaut) - Valide par GX049839_v2: streaming+conf=1.5+kf=1 → 146M pts vs 0 pts en windowed sans conf_threshold - Ajoute --offload_to_cpu (stable sur RTX 3090 .84)	2026-05-12 16:38:33 +00:00
Poulpe	8880c28af9	auto-iter 2026-05-12: keyframe_interval 6→1 (streaming, validé GX049839_v2 146M pts)	2026-05-12 16:37:06 +00:00
Poulpe	df45fd155d	auto-iter 2026-05-12: bottom_visible_pct_min 30→25 (GX019817 29% récupérable)	2026-05-12 10:34:19 +00:00
Poulpe	f0154d7ea5	auto-iter 2026-05-12: log iter-2 + veille	2026-05-12 04:40:56 +00:00
Poulpe	8b826b0827	auto-iter 2026-05-12: fix duplicate frame_extract key in thresholds.yaml	2026-05-12 04:39:03 +00:00
Poulpe	4f54d58cd3	auto-iter 2026-05-11: iteration-log + veille iter-1	2026-05-11 22:34:42 +00:00
Poulpe	06d4aa5d4d	auto-iter 2026-05-11: bottom_visible_pct seuil 50→30 (avg=37.5%)	2026-05-11 22:33:12 +00:00
Poulpe	e09ef7886b	feat(pipeline): stage 04b port trim_above_water from dispatcher	2026-05-11 14:08:30 +00:00
Ubuntu	82f71fcc96	feat: frame QC scoring + viser per-AUV button Stage 04 frame extract: - New lib_frame_qc.py: per-frame Laplacian/contrast/blue-dominance scoring - Classes: bottom_visible / water_no_bottom / turbid_water / out_of_water - Sample 1/5 frames after extraction, write qc.json per segment - Record metrics (frames_total, frames_bottom_visible, bottom_visible_pct) - Mark job degraded when bottom_visible_pct < 50% Per-AUV viser view: - scripts/viser_auv.py loads all PLYs of an AUV, color per file - POST /pipeline/missions/{id}/auvs/{auv}/view rsyncs ply -> worker - launches viser on hashed port 9300+, returns URL - _pipeline.html exposes AUV list, JS handler opens viser tab	2026-05-11 11:05:37 +00:00
Ubuntu	1a4fffd2c1	feat: pipeline monitor + orchestrator stats dashboard	2026-05-11 10:55:44 +00:00
Ubuntu	e597407ee5	feat(pipeline): jalon 1-3 — ingest, USBL parse, filter Stages 01-03 opérationnels sur 20260505-Lepradet: - 01_ingest: manifest auto, 3 AUVs vidéo, 3 AUVs bags, mapping AUV2xx↔AUV0xx - 02_usbl_parse: MCAP (format incompatible firmware) → fallback serial CSV, 213 pts bruts - 03_usbl_filter: MAD-3σ + moving-avg + Kalman optionnel, dégradé gracieux si null lat/lon - orchestrator/db.py: SQLite schema missions/jobs/metrics idempotent - config/: thresholds.yaml + default_params.yaml versionnés - qa/checks.py: vérifications pass/fail/degraded par étape Note: MCAP bags corrompus ou format non-standard firmware — lat/lon absent. Statut degraded (pas crash). Nécessite investigation format MCAP spécifique.	2026-05-11 10:25:27 +00:00