feat: hook post-job cosma-nav + style dashboard + docker-compose update
This commit is contained in:
@@ -806,6 +806,43 @@ def run_one_stitch(stitch: sqlite3.Row):
|
||||
finished_at=_now_iso())
|
||||
|
||||
|
||||
|
||||
ML_STACK_HOST = "192.168.0.84"
|
||||
ML_STACK_ALIAS = "ml-stack"
|
||||
_PRE_DECIMATE = "/root/cosma-nav/scripts/pre_decimate.py"
|
||||
_ARCHIVE_SH = "/root/cosma-nav/scripts/archive_job.sh"
|
||||
|
||||
|
||||
def _post_job_qc_sync(job_id: int, worker: dict, frames_dir: str):
|
||||
"""Fire-and-forget: decimate PLY + archive to NAS after a successful job.
|
||||
Only runs when the worker is ml-stack (.84) where the scripts live.
|
||||
"""
|
||||
if worker["host"] != ML_STACK_HOST:
|
||||
print(f" post_job #{job_id}: worker={worker['host']} != ml-stack, skip QC sync", flush=True)
|
||||
return
|
||||
alias = ML_STACK_ALIAS
|
||||
parent = str(Path(frames_dir).parent)
|
||||
pre_cmd = (
|
||||
f"python3 {_PRE_DECIMATE} {job_id} "
|
||||
f"--frames-dir {shlex.quote(parent)} "
|
||||
f"> /tmp/pre_decimate_{job_id}.log 2>&1"
|
||||
)
|
||||
rc_pre, _, _ = ssh(alias, pre_cmd, timeout=600)
|
||||
if rc_pre == 0:
|
||||
print(f" post_job #{job_id}: pre_decimate OK", flush=True)
|
||||
else:
|
||||
tail = ssh(alias, f"tail -5 /tmp/pre_decimate_{job_id}.log")[1]
|
||||
print(f" post_job #{job_id}: pre_decimate FAIL: {tail[:300]}", flush=True)
|
||||
|
||||
arc_cmd = f"bash {_ARCHIVE_SH} {job_id} > /tmp/archive_{job_id}.log 2>&1"
|
||||
rc_arc, _, _ = ssh(alias, arc_cmd, timeout=600)
|
||||
if rc_arc == 0:
|
||||
print(f" post_job #{job_id}: archive OK", flush=True)
|
||||
else:
|
||||
tail = ssh(alias, f"tail -5 /tmp/archive_{job_id}.log")[1]
|
||||
print(f" post_job #{job_id}: archive FAIL: {tail[:300]}", flush=True)
|
||||
|
||||
|
||||
def run_one(job: sqlite3.Row) -> bool:
|
||||
"""Returns True if a worker was picked and work started."""
|
||||
job_id = job["id"]
|
||||
@@ -825,6 +862,7 @@ def run_one(job: sqlite3.Row) -> bool:
|
||||
set_status(job_id, status="done", viser_url=viser_url, ply_path=ply_path,
|
||||
progress=100, log_tail=log, finished_at=_now_iso())
|
||||
_maybe_create_per_auv_stitch(job_id)
|
||||
threading.Thread(target=_post_job_qc_sync, args=(job_id, worker, frames_dir), daemon=True).start()
|
||||
except Exception as e:
|
||||
# do_extract raises "skipped_short" after flagging status='skipped' — don't override.
|
||||
if "skipped_short" not in str(e):
|
||||
|
||||
Reference in New Issue
Block a user