fix(05-inference): min_frames guard + configurable timeout #12

Open
poulpe wants to merge 1 commits from fix/05-inference-min-frames-timeout into feature/auto-pipeline
Showing only changes of commit 5ead87d59c - Show all commits

View File

@@ -195,9 +195,10 @@ def run_inference(frames_dir: Path, worker_key: str, mission_name: str,
print(f" [05] Launching inference on {host}...") print(f" [05] Launching inference on {host}...")
t0 = time.time() t0 = time.time()
inf_timeout = int(_INF_CFG.get("inference_timeout_s", 10800))
r = subprocess.run( r = subprocess.run(
["ssh", "-o", "StrictHostKeyChecking=no", ssh_target, demo_cmd], ["ssh", "-o", "StrictHostKeyChecking=no", ssh_target, demo_cmd],
capture_output=True, text=True, timeout=7200, # 2h max capture_output=True, text=True, timeout=inf_timeout,
) )
elapsed = time.time() - t0 elapsed = time.time() - t0
metrics["inference_s"] = round(elapsed, 1) metrics["inference_s"] = round(elapsed, 1)
@@ -265,6 +266,19 @@ def process_frames_dir(frames_dir: Path, worker_key: str, mission_name: str) ->
if not frames: if not frames:
continue continue
print(f"\n[05] === {auv_id}/{seg_dir.name}: {len(frames)} frames ===") print(f"\n[05] === {auv_id}/{seg_dir.name}: {len(frames)} frames ===")
# Guard: min frames required for model (RoPE/attention)
min_frames = int(_INF_CFG.get("min_frames_for_inference", 32))
if len(frames) < min_frames:
print(f" [05] SKIP {auv_id}/{seg_dir.name}: {len(frames)} frames < {min_frames} min")
init_db()
with get_conn() as conn_mf:
mr = conn_mf.execute("SELECT id FROM missions WHERE name=?", (mission_name,)).fetchone()
if mr:
upsert_job(conn_mf, mr["id"], auv_id, seg_dir.name, "05_inference",
status="skipped",
error_msg=f"frames_too_few={len(frames)}<{min_frames}")
continue
m = run_inference(seg_dir, worker_key, mission_name, auv_id, seg_dir.name) m = run_inference(seg_dir, worker_key, mission_name, auv_id, seg_dir.name)
all_metrics.append(m) all_metrics.append(m)