stitch pipeline câblé : DB + dispatcher + UI + fix subpath Caddy

- Table stitches (per_auv + cross_auv) avec cancel/retry API
- Dispatcher : PLY export auto (--save_ply), trigger stitch en cascade
  quand tous les jobs d'un AUV sont done
- UI : section stitch live depuis DB avec statuts/durées/boutons
- Fix : <base href="/cosma-qc/"> + chemins relatifs pour Caddy subpath
- open3d 0.19.0 installé sur gpu (.87)
- SSH key .82→.87 configurée, alias gpu ajouté sur .82

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Poulpe
2026-04-21 10:32:05 +00:00
parent 3b005a4994
commit 26e5bfc05b
5 changed files with 281 additions and 48 deletions

View File

@@ -102,6 +102,24 @@ def init_schema() -> None:
CREATE INDEX IF NOT EXISTS jobs_status_idx ON jobs(status); CREATE INDEX IF NOT EXISTS jobs_status_idx ON jobs(status);
CREATE INDEX IF NOT EXISTS jobs_acq_idx ON jobs(acquisition_id); CREATE INDEX IF NOT EXISTS jobs_acq_idx ON jobs(acquisition_id);
CREATE TABLE IF NOT EXISTS stitches (
id INTEGER PRIMARY KEY,
acquisition_id INTEGER NOT NULL REFERENCES acquisitions(id) ON DELETE CASCADE,
level TEXT NOT NULL DEFAULT 'per_auv',
auv TEXT,
input_job_ids TEXT NOT NULL DEFAULT '[]',
input_stitch_ids TEXT NOT NULL DEFAULT '[]',
output_ply TEXT,
status TEXT NOT NULL DEFAULT 'queued',
worker_host TEXT,
started_at TEXT,
finished_at TEXT,
error TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS stitches_acq_idx ON stitches(acquisition_id);
""") """)
@@ -124,6 +142,10 @@ def _build_acquisitions():
jobs = conn.execute( jobs = conn.execute(
"SELECT * FROM jobs ORDER BY auv, gopro_serial, segment_label" "SELECT * FROM jobs ORDER BY auv, gopro_serial, segment_label"
).fetchall() ).fetchall()
stitches = conn.execute(
"SELECT * FROM stitches ORDER BY level DESC, auv"
).fetchall()
by_acq: dict[int, list[dict]] = {} by_acq: dict[int, list[dict]] = {}
by_acq_total: dict[int, int] = {} by_acq_total: dict[int, int] = {}
for j in jobs: for j in jobs:
@@ -133,12 +155,30 @@ def _build_acquisitions():
by_acq.setdefault(j["acquisition_id"], []).append(d) by_acq.setdefault(j["acquisition_id"], []).append(d)
by_acq_total[j["acquisition_id"]] = by_acq_total.get(j["acquisition_id"], 0) + dur_s by_acq_total[j["acquisition_id"]] = by_acq_total.get(j["acquisition_id"], 0) + dur_s
stitches_by_acq: dict[int, list[dict]] = {}
for s in stitches:
d = dict(s)
start = _parse_ts(s["started_at"])
end = _parse_ts(s["finished_at"]) or (
datetime.now(timezone.utc) if s["status"] == "running" else None
)
if start and end:
if start.tzinfo is None:
start = start.replace(tzinfo=timezone.utc)
if end.tzinfo is None:
end = end.replace(tzinfo=timezone.utc)
d["_duration"] = _fmt_dur(int((end - start).total_seconds()))
else:
d["_duration"] = ""
stitches_by_acq.setdefault(s["acquisition_id"], []).append(d)
return [ return [
{ {
"id": acq["id"], "id": acq["id"],
"name": acq["name"], "name": acq["name"],
"source_path": acq["source_path"], "source_path": acq["source_path"],
"jobs": by_acq.get(acq["id"], []), "jobs": by_acq.get(acq["id"], []),
"stitches": stitches_by_acq.get(acq["id"], []),
"total_duration": _fmt_dur(by_acq_total.get(acq["id"], 0)), "total_duration": _fmt_dur(by_acq_total.get(acq["id"], 0)),
} }
for acq in acqs for acq in acqs
@@ -220,3 +260,25 @@ async def retry_job(job_id: int):
(job_id,), (job_id,),
) )
return {"ok": True} return {"ok": True}
@app.post("/stitches/{stitch_id}/cancel")
async def cancel_stitch(stitch_id: int):
with closing(db()) as conn:
conn.execute(
"UPDATE stitches SET status='error', error='cancelled by user', finished_at=datetime('now') "
"WHERE id=? AND status IN ('queued','running')",
(stitch_id,),
)
return {"ok": True}
@app.post("/stitches/{stitch_id}/retry")
async def retry_stitch(stitch_id: int):
with closing(db()) as conn:
conn.execute(
"UPDATE stitches SET status='queued', error=NULL, output_ply=NULL, "
"started_at=NULL, finished_at=NULL, worker_host=NULL WHERE id=? AND status='error'",
(stitch_id,),
)
return {"ok": True}

1
app/static/htmx.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@@ -1,12 +1,3 @@
{% macro duration(job) -%}
{%- if job.started_at and job.finished_at -%}
{{ job._duration }}
{%- elif job.started_at and not job.finished_at -%}
{{ job._duration }}
{%- else -%}&nbsp;
{%- endif -%}
{%- endmacro %}
{% if not acquisitions %} {% if not acquisitions %}
<p class="muted">Aucune acquisition. Ingeste un dossier via <code>scripts/ingest.py</code>.</p> <p class="muted">Aucune acquisition. Ingeste un dossier via <code>scripts/ingest.py</code>.</p>
{% else %} {% else %}
@@ -34,26 +25,56 @@
</span> </span>
<span class="dur">{{ j._duration }}</span> <span class="dur">{{ j._duration }}</span>
{% if j.status in ('queued','extracting','running') %} {% if j.status in ('queued','extracting','running') %}
<button class="mini" hx-post="/jobs/{{ j.id }}/cancel" hx-target="#jobs-table">×</button> <button class="mini" hx-post="jobs/{{ j.id }}/cancel" hx-target="#jobs-table">×</button>
{% elif j.status == 'error' %} {% elif j.status == 'error' %}
<button class="mini" hx-post="/jobs/{{ j.id }}/retry" hx-target="#jobs-table"></button> <button class="mini" hx-post="jobs/{{ j.id }}/retry" hx-target="#jobs-table"></button>
{% else %}
<span></span>
{% endif %} {% endif %}
</li> </li>
{% if j.error %}<li class="err-line">{{ j.error }}</li>{% endif %} {% if j.error %}<li class="err-line">{{ j.error }}</li>{% endif %}
{% endfor %} {% endfor %}
</ul> </ul>
{# Stitch section (placeholder — wired up once multi-job stitching lands) #}
<div class="stitch-section"> <div class="stitch-section">
<div class="stitch-title"> <div class="stitch-title">
<span class="icon"><span class="sq"></span></span> <span class="icon"><span class="sq"></span></span>
<span>stitch</span> <span>stitch</span>
</div> </div>
<ul class="stitch-children"> {% if acq.stitches %}
<li class="sub pending"><span class="sq"></span> pair GP1↔GP2 per AUV</li> <ul class="stitch-children">
<li class="sub pending"><span class="sq"></span> cross-AUV merge</li> {% for s in acq.stitches %}
<li class="sub pending"><span class="sq"></span> final PLY</li> <li class="sub {{ s.status }}">
</ul> <span class="icon stitch-icon">
{% if s.status == 'done' %}<span class="check ok"></span>
{% elif s.status == 'running' %}<span class="spin"></span>
{% elif s.status == 'error' %}<span class="err"></span>
{% else %}<span class="sq"></span>{% endif %}
</span>
<span>
{% if s.level == 'per_auv' %}pair GP1↔GP2 {{ s.auv }}
{% else %}merge final{% endif %}
{% if s._duration %}<span class="dur muted"> — {{ s._duration }}</span>{% endif %}
{% if s.status == 'done' and s.output_ply %}
<span class="ext" title="{{ s.output_ply }}">PLY</span>
{% endif %}
</span>
{% if s.status in ('queued','running') %}
<button class="mini" hx-post="stitches/{{ s.id }}/cancel" hx-target="#jobs-table">×</button>
{% elif s.status == 'error' %}
<button class="mini" hx-post="stitches/{{ s.id }}/retry" hx-target="#jobs-table"></button>
{% endif %}
</li>
{% if s.error %}<li class="err-line" style="padding-left:42px">{{ s.error[:120] }}</li>{% endif %}
{% endfor %}
</ul>
{% else %}
<ul class="stitch-children">
<li class="sub pending"><span class="sq"></span> pair GP1↔GP2 per AUV</li>
<li class="sub pending"><span class="sq"></span> cross-AUV merge</li>
<li class="sub pending"><span class="sq"></span> final PLY</li>
</ul>
{% endif %}
</div> </div>
</div> </div>
{% endfor %} {% endfor %}

View File

@@ -4,8 +4,9 @@
<meta charset="utf-8"> <meta charset="utf-8">
<title>cosma-qc — dashboard</title> <title>cosma-qc — dashboard</title>
<meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="viewport" content="width=device-width, initial-scale=1">
<script src="https://unpkg.com/htmx.org@2.0.4"></script> <base href="/cosma-qc/">
<link rel="stylesheet" href="/static/style.css"> <script src="static/htmx.min.js"></script>
<link rel="stylesheet" href="static/style.css">
</head> </head>
<body> <body>
<header> <header>
@@ -13,13 +14,13 @@
<span class="sub">post-acquisition QC · lingbot-map pipeline</span> <span class="sub">post-acquisition QC · lingbot-map pipeline</span>
</header> </header>
<section id="monitor" hx-get="/partials/monitor" hx-trigger="load, every 5s" hx-swap="innerHTML"> <section id="monitor" hx-get="partials/monitor" hx-trigger="load, every 5s" hx-swap="innerHTML">
<p class="muted">Chargement des workers…</p> <p class="muted">Chargement des workers…</p>
</section> </section>
<section id="jobs"> <section id="jobs">
<h2>Jobs</h2> <h2>Jobs</h2>
<div id="jobs-table" hx-get="/partials/jobs" hx-trigger="load, every 3s" hx-swap="innerHTML"> <div id="jobs-table" hx-get="partials/jobs" hx-trigger="load, every 3s" hx-swap="innerHTML">
<p class="muted">Chargement…</p> <p class="muted">Chargement…</p>
</div> </div>
</section> </section>

View File

@@ -1,9 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Dispatcher daemon: picks queued jobs and runs them on available workers. """Dispatcher daemon: picks queued jobs/stitches and runs them on available workers.
One-shot worker loop. Run as a systemd service (or manually). Handles both
extraction (ffmpeg on the worker) and reconstruction (lingbot-map on the
worker). Progress is written back to the DB.
Env: Env:
COSMA_QC_DB : SQLite path (default /var/lib/cosma-qc/jobs.db) COSMA_QC_DB : SQLite path (default /var/lib/cosma-qc/jobs.db)
@@ -14,8 +10,11 @@ Env:
COSMA_QC_IMG_W : image width (default 518) COSMA_QC_IMG_W : image width (default 518)
Jobs lifecycle: Jobs lifecycle:
queued → extracting → running → done queued → extracting → running → done → [triggers per_auv stitch]
↘ error ↘ error
Stitch lifecycle:
queued → running → done → [triggers cross_auv stitch if all per_auv done]
↘ error
""" """
from __future__ import annotations from __future__ import annotations
@@ -40,6 +39,7 @@ FPS = int(os.environ.get("COSMA_QC_FPS", "3"))
IMG_H = int(os.environ.get("COSMA_QC_IMG_H", "294")) IMG_H = int(os.environ.get("COSMA_QC_IMG_H", "294"))
IMG_W = int(os.environ.get("COSMA_QC_IMG_W", "518")) IMG_W = int(os.environ.get("COSMA_QC_IMG_W", "518"))
POLL_S = int(os.environ.get("COSMA_QC_POLL_S", "4")) POLL_S = int(os.environ.get("COSMA_QC_POLL_S", "4"))
STITCH_SCRIPT = Path(__file__).parent / "stitch.py"
DEFAULT_WORKERS = [ DEFAULT_WORKERS = [
{ {
@@ -93,9 +93,7 @@ def pick_worker(estimated_vram_mib: int) -> dict | None:
def estimate_vram_mib(frame_count: int) -> int: def estimate_vram_mib(frame_count: int) -> int:
# Based on empirical: 300 frames peak ≈ 9.4 GiB, 600 frames OOM @ ~11 GiB. return int(3500 + 13 * frame_count)
# Linear extrapolation with headroom.
return int(3500 + 13 * frame_count) # MiB
def set_status(job_id: int, **fields): def set_status(job_id: int, **fields):
@@ -106,6 +104,14 @@ def set_status(job_id: int, **fields):
conn.execute(q, (*vals, job_id)) conn.execute(q, (*vals, job_id))
def set_stitch_status(stitch_id: int, **fields):
keys = list(fields.keys())
vals = [fields[k] for k in keys]
q = "UPDATE stitches SET " + ", ".join(f"{k}=?" for k in keys) + " WHERE id=?"
with closing(db()) as conn:
conn.execute(q, (*vals, stitch_id))
def count_frames(worker: dict, frames_dir: str) -> int: def count_frames(worker: dict, frames_dir: str) -> int:
rc, out, _ = ssh(worker["ssh_alias"], f"ls {shlex.quote(frames_dir)} 2>/dev/null | wc -l") rc, out, _ = ssh(worker["ssh_alias"], f"ls {shlex.quote(frames_dir)} 2>/dev/null | wc -l")
try: try:
@@ -115,7 +121,6 @@ def count_frames(worker: dict, frames_dir: str) -> int:
def do_extract(job: sqlite3.Row, worker: dict) -> str: def do_extract(job: sqlite3.Row, worker: dict) -> str:
"""Run ffmpeg on the worker for each video in job.video_paths."""
videos = json.loads(job["video_paths"]) videos = json.loads(job["video_paths"])
frames_dir = f"{worker['frames_dir']}/job_{job['id']}" frames_dir = f"{worker['frames_dir']}/job_{job['id']}"
ssh(worker["ssh_alias"], f"mkdir -p {shlex.quote(frames_dir)}") ssh(worker["ssh_alias"], f"mkdir -p {shlex.quote(frames_dir)}")
@@ -123,7 +128,6 @@ def do_extract(job: sqlite3.Row, worker: dict) -> str:
for v in videos: for v in videos:
vf = f"fps={FPS},scale={IMG_W}:{IMG_H}" vf = f"fps={FPS},scale={IMG_W}:{IMG_H}"
pattern = f"{frames_dir}/frame_%06d.jpg" pattern = f"{frames_dir}/frame_%06d.jpg"
# Prepend to idx to keep frame ordering across videos.
cmd = ( cmd = (
f"ffmpeg -hide_banner -loglevel error -i {shlex.quote(v)} " f"ffmpeg -hide_banner -loglevel error -i {shlex.quote(v)} "
f"-vf {shlex.quote(vf)} -start_number {idx} -q:v 4 " f"-vf {shlex.quote(vf)} -start_number {idx} -q:v 4 "
@@ -132,21 +136,22 @@ def do_extract(job: sqlite3.Row, worker: dict) -> str:
rc, _, err = ssh(worker["ssh_alias"], cmd, timeout=3600) rc, _, err = ssh(worker["ssh_alias"], cmd, timeout=3600)
if rc != 0: if rc != 0:
raise RuntimeError(f"ffmpeg failed on {v}: {err[:200]}") raise RuntimeError(f"ffmpeg failed on {v}: {err[:200]}")
# Count frames now present to bump idx
idx = count_frames(worker, frames_dir) idx = count_frames(worker, frames_dir)
set_status(job["id"], frame_count=idx) set_status(job["id"], frame_count=idx)
return frames_dir return frames_dir
def do_reconstruct(job: sqlite3.Row, worker: dict, frames_dir: str) -> tuple[str, str]: def do_reconstruct(job: sqlite3.Row, worker: dict, frames_dir: str) -> tuple[str, str, str]:
port = worker["viser_port_base"] + job["id"] port = worker["viser_port_base"] + job["id"]
log = f"/tmp/cosma-qc-job-{job['id']}.log" log = f"/tmp/cosma-qc-job-{job['id']}.log"
ckpt = f"{worker['lingbot_path']}/checkpoints/lingbot-map/lingbot-map-long.pt" ckpt = f"{worker['lingbot_path']}/checkpoints/lingbot-map/lingbot-map-long.pt"
ply_path = f"{frames_dir}/reconstruction.ply"
cmd = ( cmd = (
f"cd {shlex.quote(worker['lingbot_path'])} && source .venv/bin/activate && " f"cd {shlex.quote(worker['lingbot_path'])} && source .venv/bin/activate && "
f"python3 demo.py --model_path {shlex.quote(ckpt)} " f"python3 demo.py --model_path {shlex.quote(ckpt)} "
f"--image_folder {shlex.quote(frames_dir)} --port {port} " f"--image_folder {shlex.quote(frames_dir)} --port {port} "
f"--use_sdpa --mode windowed --window_size 16 --overlap_size 2 --offload_to_cpu " f"--use_sdpa --mode windowed --window_size 16 --overlap_size 2 --offload_to_cpu "
f"--save_ply {shlex.quote(ply_path)} "
f"> {log} 2>&1" f"> {log} 2>&1"
) )
rc, _, err = ssh(worker["ssh_alias"], cmd, timeout=3 * 3600) rc, _, err = ssh(worker["ssh_alias"], cmd, timeout=3 * 3600)
@@ -154,7 +159,138 @@ def do_reconstruct(job: sqlite3.Row, worker: dict, frames_dir: str) -> tuple[str
tail = ssh(worker["ssh_alias"], f"tail -30 {log}")[1] tail = ssh(worker["ssh_alias"], f"tail -30 {log}")[1]
raise RuntimeError(f"demo.py failed: {err[:200]}\n---\n{tail[:800]}") raise RuntimeError(f"demo.py failed: {err[:200]}\n---\n{tail[:800]}")
viser_url = f"http://{worker['host']}:{port}" viser_url = f"http://{worker['host']}:{port}"
return viser_url, log return viser_url, log, ply_path
def _maybe_create_per_auv_stitch(job_id: int):
with closing(db()) as conn:
job = conn.execute("SELECT * FROM jobs WHERE id=?", (job_id,)).fetchone()
if not job:
return
acq_id, auv = job["acquisition_id"], job["auv"]
total = conn.execute(
"SELECT COUNT(*) FROM jobs WHERE acquisition_id=? AND auv=?", (acq_id, auv)
).fetchone()[0]
done = conn.execute(
"SELECT COUNT(*) FROM jobs WHERE acquisition_id=? AND auv=? AND status='done'", (acq_id, auv)
).fetchone()[0]
if total == 0 or done < total:
return
existing = conn.execute(
"SELECT id FROM stitches WHERE acquisition_id=? AND level='per_auv' AND auv=?", (acq_id, auv)
).fetchone()
if existing:
return
job_ids = [r["id"] for r in conn.execute(
"SELECT id FROM jobs WHERE acquisition_id=? AND auv=?", (acq_id, auv)
).fetchall()]
conn.execute(
"INSERT INTO stitches (acquisition_id, level, auv, input_job_ids) VALUES (?,?,?,?)",
(acq_id, "per_auv", auv, json.dumps(job_ids))
)
print(f" → Stitch per_auv créé pour {auv} acq#{acq_id}")
def _maybe_create_cross_auv_stitch(stitch_id: int):
with closing(db()) as conn:
st = conn.execute("SELECT * FROM stitches WHERE id=?", (stitch_id,)).fetchone()
if not st:
return
acq_id = st["acquisition_id"]
n_auvs = conn.execute(
"SELECT COUNT(DISTINCT auv) FROM jobs WHERE acquisition_id=?", (acq_id,)
).fetchone()[0]
if n_auvs < 2:
return
total_per_auv = conn.execute(
"SELECT COUNT(*) FROM stitches WHERE acquisition_id=? AND level='per_auv'", (acq_id,)
).fetchone()[0]
done_per_auv = conn.execute(
"SELECT COUNT(*) FROM stitches WHERE acquisition_id=? AND level='per_auv' AND status='done'", (acq_id,)
).fetchone()[0]
if total_per_auv == 0 or done_per_auv < n_auvs:
return
existing = conn.execute(
"SELECT id FROM stitches WHERE acquisition_id=? AND level='cross_auv'", (acq_id,)
).fetchone()
if existing:
return
stitch_ids = [r["id"] for r in conn.execute(
"SELECT id FROM stitches WHERE acquisition_id=? AND level='per_auv'", (acq_id,)
).fetchall()]
conn.execute(
"INSERT INTO stitches (acquisition_id, level, input_stitch_ids, input_job_ids) VALUES (?,?,?,?)",
(acq_id, "cross_auv", json.dumps(stitch_ids), "[]")
)
print(f" → Stitch cross_auv créé pour acq#{acq_id}")
def deploy_stitch_script(worker: dict):
subprocess.run(
["scp", str(STITCH_SCRIPT), f"{worker['ssh_alias']}:/tmp/cosma-stitch.py"],
capture_output=True, timeout=30
)
def run_one_stitch(stitch: sqlite3.Row):
stitch_id = stitch["id"]
worker = pick_worker(2000)
if not worker:
worker = WORKERS[0]
with closing(db()) as conn:
if stitch["level"] == "per_auv":
job_ids = json.loads(stitch["input_job_ids"] or "[]")
if job_ids:
rows = conn.execute(
f"SELECT ply_path FROM jobs WHERE id IN ({','.join('?'*len(job_ids))})",
job_ids
).fetchall()
else:
rows = []
ply_paths = [r["ply_path"] for r in rows if r["ply_path"]]
else:
stitch_ids = json.loads(stitch["input_stitch_ids"] or "[]")
if stitch_ids:
rows = conn.execute(
f"SELECT output_ply FROM stitches WHERE id IN ({','.join('?'*len(stitch_ids))})",
stitch_ids
).fetchall()
else:
rows = []
ply_paths = [r["output_ply"] for r in rows if r["output_ply"]]
if len(ply_paths) < 2:
set_stitch_status(stitch_id, status="error",
error=f"Pas assez de PLY disponibles ({len(ply_paths)})",
finished_at=_now_iso())
return
out_ply = f"{worker['frames_dir']}/stitch_{stitch_id}.ply"
deploy_stitch_script(worker)
cmd = (
f"source {shlex.quote(worker['lingbot_path'])}/.venv/bin/activate && "
f"python3 /tmp/cosma-stitch.py {shlex.quote(out_ply)} "
+ " ".join(shlex.quote(p) for p in ply_paths)
+ f" > /tmp/cosma-stitch-{stitch_id}.log 2>&1"
)
set_stitch_status(stitch_id, status="running", worker_host=worker["host"], started_at=_now_iso())
try:
rc, _, err = ssh(worker["ssh_alias"], cmd, timeout=4 * 3600)
except Exception as e:
set_stitch_status(stitch_id, status="error", error=str(e)[:500], finished_at=_now_iso())
return
if rc == 0:
set_stitch_status(stitch_id, status="done", output_ply=out_ply, finished_at=_now_iso())
_maybe_create_cross_auv_stitch(stitch_id)
else:
tail = ssh(worker["ssh_alias"], f"tail -20 /tmp/cosma-stitch-{stitch_id}.log")[1]
set_stitch_status(stitch_id, status="error",
error=f"{err[:200]}\n{tail[:600]}",
finished_at=_now_iso())
def run_one(job: sqlite3.Row): def run_one(job: sqlite3.Row):
@@ -162,21 +298,19 @@ def run_one(job: sqlite3.Row):
estimated = estimate_vram_mib(job["frame_count"] or 400) estimated = estimate_vram_mib(job["frame_count"] or 400)
worker = pick_worker(estimated) worker = pick_worker(estimated)
if not worker: if not worker:
return # retry later return
set_status(job_id, status="extracting", worker_host=worker["host"], set_status(job_id, status="extracting", worker_host=worker["host"], started_at=_now_iso())
started_at=_now_iso())
try: try:
frames_dir = do_extract(job, worker) frames_dir = do_extract(job, worker)
frame_count = count_frames(worker, frames_dir) frame_count = count_frames(worker, frames_dir)
set_status(job_id, frames_dir=frames_dir, frame_count=frame_count, set_status(job_id, frames_dir=frames_dir, frame_count=frame_count,
status="running", progress=0) status="running", progress=0)
viser_url, log = do_reconstruct(job, worker, frames_dir) viser_url, log, ply_path = do_reconstruct(job, worker, frames_dir)
set_status(job_id, status="done", viser_url=viser_url, progress=100, set_status(job_id, status="done", viser_url=viser_url, ply_path=ply_path,
log_tail=log, progress=100, log_tail=log, finished_at=_now_iso())
finished_at=_now_iso()) _maybe_create_per_auv_stitch(job_id)
except Exception as e: except Exception as e:
set_status(job_id, status="error", error=str(e)[:2000], set_status(job_id, status="error", error=str(e)[:2000], finished_at=_now_iso())
finished_at=_now_iso())
def pop_queued() -> sqlite3.Row | None: def pop_queued() -> sqlite3.Row | None:
@@ -186,14 +320,28 @@ def pop_queued() -> sqlite3.Row | None:
).fetchone() ).fetchone()
def pop_queued_stitch() -> sqlite3.Row | None:
with closing(db()) as conn:
return conn.execute(
"SELECT * FROM stitches WHERE status='queued' ORDER BY created_at LIMIT 1"
).fetchone()
def main(): def main():
print(f"cosma-qc dispatcher · DB={DB_PATH} · workers={[w['host'] for w in WORKERS]}") print(f"cosma-qc dispatcher · DB={DB_PATH} · workers={[w['host'] for w in WORKERS]}")
while True: while True:
job = pop_queued() job = pop_queued()
if job is None: if job:
time.sleep(POLL_S); continue print(f"→ job #{job['id']} ({job['auv']}/{job['gopro_serial']}/{job['segment_label']})")
print(f"→ picking up job #{job['id']} ({job['auv']}/{job['gopro_serial']}/{job['segment_label']})") run_one(job)
run_one(job) continue
stitch = pop_queued_stitch()
if stitch:
label = f"{stitch['level']} {stitch['auv'] or ''} acq#{stitch['acquisition_id']}"
print(f"→ stitch #{stitch['id']} ({label})")
run_one_stitch(stitch)
continue
time.sleep(POLL_S)
if __name__ == "__main__": if __name__ == "__main__":