feat: closes #2,#3,#4 — scripts pre_decimate + archive_job + check_jobs + tests

This commit is contained in:
Floppyrj45
2026-04-25 18:23:18 +02:00
parent 74edb36471
commit 48e9d42260
6 changed files with 267 additions and 0 deletions

0
scripts/__init__.py Normal file
View File

29
scripts/archive_job.sh Normal file
View File

@@ -0,0 +1,29 @@
#!/usr/bin/env bash
# archive_job.sh <job_id> [frames_base] [nas_base]
# Rsyncs job frames + PLY to NAS .156
set -euo pipefail
JOB_ID="${1:?Usage: archive_job.sh <job_id> [frames_base] [nas_base]}"
FRAMES_BASE="${2:-/root/cosma-qc-frames}"
NAS_BASE="${3:-/mnt/nas-cosma/cosma-archive}"
SRC="${FRAMES_BASE}/job_${JOB_ID}"
DST="${NAS_BASE}/job_${JOB_ID}"
if [ ! -d "${SRC}" ]; then
echo "Job dir not found: ${SRC}"
exit 1
fi
mkdir -p "${DST}"
echo "[$(date)] Archivage job_${JOB_ID} vers NAS..."
rsync -av --progress "${SRC}/" "${DST}/" \
--include="frame_*.jpg" \
--include="*.ply" \
--include="*.npz" \
--include="*.log" \
--exclude="*" \
2>&1 | tail -5
echo "[$(date)] Archive job_${JOB_ID} done: ${DST}"

69
scripts/check_jobs.py Normal file
View File

@@ -0,0 +1,69 @@
#!/usr/bin/env python3
"""Check integrity of processed jobs (PLY + poses present)."""
import argparse
import glob
import json
import os
from pathlib import Path
from typing import Any
REQUIRED_FILES = ["reconstruction.ply", "lingbot_poses.npz"]
OPTIONAL_FILES = ["model_decimated.ply"]
def check_job(job_id: int, frames_base: str = "/root/cosma-qc-frames") -> dict[str, Any]:
job_dir = Path(frames_base) / f"job_{job_id}"
if not job_dir.exists():
return {"job_id": job_id, "status": "missing", "missing": [], "details": {}}
missing = [f for f in REQUIRED_FILES if not (job_dir / f).exists()]
details: dict[str, Any] = {}
ply = job_dir / "reconstruction.ply"
if ply.exists():
details["ply_size_gb"] = round(ply.stat().st_size / 1e9, 2)
poses = job_dir / "lingbot_poses.npz"
if poses.exists():
try:
import numpy as np
d = np.load(str(poses))
n = d["poses"].shape[0] if "poses" in d else 0
details["n_poses"] = n
except Exception as e:
details["poses_error"] = str(e)
decimated = job_dir / "model_decimated.ply"
details["decimated"] = decimated.exists()
return {
"job_id": job_id,
"status": "ok" if not missing else "incomplete",
"missing": missing,
"details": details,
}
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("job_ids", nargs="*", type=int)
p.add_argument("--frames-base", default="/root/cosma-qc-frames")
p.add_argument("--all", action="store_true", help="Check all job dirs")
args = p.parse_args()
base = Path(args.frames_base)
if args.all:
ids = sorted(
int(d.name.replace("job_", ""))
for d in base.iterdir()
if d.is_dir() and d.name.startswith("job_") and d.name[4:].isdigit()
)
else:
ids = args.job_ids
results = [check_job(jid, args.frames_base) for jid in ids]
print(json.dumps(results, indent=2))
if __name__ == "__main__":
main()

62
scripts/pre_decimate.py Normal file
View File

@@ -0,0 +1,62 @@
#!/usr/bin/env python3
"""Decimate PLY and SCP to cosma-vm after a job completes."""
import argparse
import subprocess
import sys
from pathlib import Path
COSMA_VM = "cosma@192.168.0.83"
COSMA_DATA = "/data/cosma"
MAX_PTS = 300_000
def find_ply(frames_dir: Path) -> Path | None:
for candidate in ["model.ply", "output.ply", "reconstruction.ply"]:
p = frames_dir / candidate
if p.exists():
return p
plys = list(frames_dir.glob("*.ply"))
return plys[0] if plys else None
def decimate_ply(src: str, dst: str, max_pts: int = MAX_PTS) -> None:
import open3d as o3d
import numpy as np
src_path = Path(src)
if not src_path.exists():
raise FileNotFoundError(src)
pcd = o3d.io.read_point_cloud(str(src_path))
n = len(pcd.points)
if n > max_pts:
vol = float(np.prod(pcd.get_max_bound() - pcd.get_min_bound()))
vox = max((vol / max_pts) ** (1 / 3), 0.02)
pcd = pcd.voxel_down_sample(vox)
o3d.io.write_point_cloud(dst, pcd)
print(f"Decimated {n} -> {len(pcd.points)} pts -> {dst}", flush=True)
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("job_id", type=int)
p.add_argument("--frames-dir", required=True)
p.add_argument("--cosma-vm", default=COSMA_VM)
p.add_argument("--cosma-data", default=COSMA_DATA)
args = p.parse_args()
frames_dir = Path(args.frames_dir)
ply_src = find_ply(frames_dir)
if ply_src is None:
print(f"No PLY found in {frames_dir}", flush=True)
sys.exit(0)
ply_dec = frames_dir / "model_decimated.ply"
decimate_ply(str(ply_src), str(ply_dec))
remote_dir = f"{args.cosma_data}/{args.job_id}"
subprocess.run(["ssh", args.cosma_vm, f"mkdir -p {remote_dir}"], check=True)
subprocess.run(["scp", str(ply_dec), f"{args.cosma_vm}:{remote_dir}/model_decimated.ply"], check=True)
print(f"SCP done -> {args.cosma_vm}:{remote_dir}/model_decimated.ply", flush=True)
if __name__ == "__main__":
main()

54
tests/test_check_jobs.py Normal file
View File

@@ -0,0 +1,54 @@
import tempfile
from pathlib import Path
def _make_job_dir(base: Path, job_id: int, has_ply: bool = True, has_poses: bool = True) -> Path:
job_dir = base / f"job_{job_id}"
job_dir.mkdir(parents=True)
if has_ply:
(job_dir / "reconstruction.ply").write_bytes(b"\x00" * 100)
if has_poses:
(job_dir / "lingbot_poses.npz").touch()
return job_dir
def test_complete_job_is_ok():
from scripts.check_jobs import check_job
with tempfile.TemporaryDirectory() as tmp:
_make_job_dir(Path(tmp), 1)
result = check_job(1, tmp)
assert result["status"] == "ok"
assert result["job_id"] == 1
assert result["missing"] == []
def test_missing_ply_flagged():
from scripts.check_jobs import check_job
with tempfile.TemporaryDirectory() as tmp:
_make_job_dir(Path(tmp), 2, has_ply=False)
result = check_job(2, tmp)
assert result["status"] == "incomplete"
assert "reconstruction.ply" in result["missing"]
def test_missing_poses_flagged():
from scripts.check_jobs import check_job
with tempfile.TemporaryDirectory() as tmp:
_make_job_dir(Path(tmp), 3, has_poses=False)
result = check_job(3, tmp)
assert result["status"] == "incomplete"
assert "lingbot_poses.npz" in result["missing"]
def test_missing_job_dir_returns_missing():
from scripts.check_jobs import check_job
result = check_job(999, "/nonexistent/base")
assert result["status"] == "missing"
def test_decimated_flag_false_without_file():
from scripts.check_jobs import check_job
with tempfile.TemporaryDirectory() as tmp:
_make_job_dir(Path(tmp), 4)
result = check_job(4, tmp)
assert result["details"]["decimated"] is False

View File

@@ -0,0 +1,53 @@
import numpy as np
import tempfile
from pathlib import Path
import pytest
def _make_tiny_ply(path: Path) -> None:
import open3d as o3d
pcd = o3d.geometry.PointCloud()
pts = np.random.rand(1000, 3).astype(np.float64)
pcd.points = o3d.utility.Vector3dVector(pts)
o3d.io.write_point_cloud(str(path), pcd)
def test_decimate_reduces_points():
from scripts.pre_decimate import decimate_ply
with tempfile.TemporaryDirectory() as tmp:
src = Path(tmp) / "model.ply"
dst = Path(tmp) / "model_decimated.ply"
_make_tiny_ply(src)
decimate_ply(str(src), str(dst), max_pts=100)
import open3d as o3d
pcd = o3d.io.read_point_cloud(str(dst))
# voxel downsampling is approximate — assert significantly fewer than original 1000
assert len(pcd.points) < 500
def test_decimate_small_cloud_unchanged():
from scripts.pre_decimate import decimate_ply
with tempfile.TemporaryDirectory() as tmp:
src = Path(tmp) / "small.ply"
dst = Path(tmp) / "small_decimated.ply"
_make_tiny_ply(src)
decimate_ply(str(src), str(dst), max_pts=5000)
import open3d as o3d
pcd = o3d.io.read_point_cloud(str(dst))
assert len(pcd.points) == 1000
def test_decimate_missing_src_raises():
from scripts.pre_decimate import decimate_ply
with tempfile.TemporaryDirectory() as tmp:
with pytest.raises(FileNotFoundError):
decimate_ply("/nonexistent.ply", str(Path(tmp) / "out.ply"))
def test_find_ply_candidates():
from scripts.pre_decimate import find_ply
with tempfile.TemporaryDirectory() as tmp:
d = Path(tmp)
assert find_ply(d) is None
(d / "model.ply").touch()
assert find_ply(d) == d / "model.ply"