From 52cd09db1b1e10fab40c90cd381bc3118c4d7cca Mon Sep 17 00:00:00 2001 From: Poulpe Date: Wed, 13 May 2026 04:35:41 +0000 Subject: [PATCH] fix(05_inference): save error_msg in DB + skip stage04-degraded segments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - upsert_job now receives error_msg= on failure → debug possible - Guard before run_inference: skip segments with stage04=degraded, mark as skipped instead of attempting inference on bad frames - Addresses: 10 orphan error records from iter-5 batch (6 were degraded at 04, 4 had transient SSH failure with no trace in DB) Author: Poulpe --- pipeline/stages/05_inference.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pipeline/stages/05_inference.py b/pipeline/stages/05_inference.py index 05b6369..03cbae9 100644 --- a/pipeline/stages/05_inference.py +++ b/pipeline/stages/05_inference.py @@ -265,6 +265,26 @@ def process_frames_dir(frames_dir: Path, worker_key: str, mission_name: str) -> if not frames: continue print(f"\n[05] === {auv_id}/{seg_dir.name}: {len(frames)} frames ===") + + # Guard: skip if stage04 is degraded (no useful frames) + init_db() + with get_conn() as conn_check: + mission_row_check = conn_check.execute( + "SELECT id FROM missions WHERE name=?", (mission_name,) + ).fetchone() + if mission_row_check: + s04 = conn_check.execute( + "SELECT status FROM jobs WHERE mission_id=? AND auv_id=? " + "AND segment_label=? AND stage='04_frame_extract'", + (mission_row_check["id"], auv_id, seg_dir.name), + ).fetchone() + if s04 and s04["status"] == "degraded": + print(f" [05] SKIP {auv_id}/{seg_dir.name}: stage04=degraded") + upsert_job(conn_check, mission_row_check["id"], auv_id, seg_dir.name, + "05_inference", status="skipped", + error_msg="stage04=degraded, skipped") + continue + m = run_inference(seg_dir, worker_key, mission_name, auv_id, seg_dir.name) all_metrics.append(m) @@ -278,6 +298,7 @@ def process_frames_dir(frames_dir: Path, worker_key: str, mission_name: str) -> conn, mission_row["id"], auv_id, seg_dir.name, "05_inference", status="done" if m.get("status") == "ok" else m.get("status", "error"), output_path=m.get("ply", ""), + error_msg=m.get("error", "") if m.get("status") != "ok" else None, ) record_metric(conn, job_id, "ply_points", value=m.get("n_points", 0), pass_fail="pass" if m.get("n_points", 0) > 100 else "fail")