feat: demo.py --save_ply + --save_poses + --save_poses_fps — export poses et PLY

2026-04-24 10:21:39 +02:00
parent 59042da3e5
commit 65b73866b1
1 changed files with 63 additions and 0 deletions
--- a/demo.py
+++ b/demo.py
@@ -256,6 +256,54 @@ def postprocess(predictions, images):
    return predictions, images_cpu


+def _save_ply(predictions: dict, out_path: str, conf_threshold: float = 1.5) -> None:
+    """Save world_points above conf_threshold as PLY file."""
+    import open3d as o3d
+    pts  = predictions.get("world_points")
+    conf = predictions.get("world_points_conf")
+    if pts is None:
+        print("WARNING: no world_points in predictions — PLY not saved")
+        return
+    if hasattr(pts, "numpy"):
+        pts = pts.numpy()
+    pts_flat = pts.reshape(-1, 3)
+    if conf is not None:
+        if hasattr(conf, "numpy"):
+            conf = conf.numpy()
+        mask = conf.reshape(-1) > conf_threshold
+        pts_flat = pts_flat[mask]
+    os.makedirs(os.path.dirname(os.path.abspath(out_path)) or ".", exist_ok=True)
+    pcd = o3d.geometry.PointCloud()
+    pcd.points = o3d.utility.Vector3dVector(pts_flat.astype(np.float64))
+    o3d.io.write_point_cloud(out_path, pcd)
+    print(f"PLY saved: {out_path}  ({len(pcd.points):,} pts)")
+
+
+def _save_poses(predictions: dict, n_frames: int, source_fps: float,
+                out_path: str, save_fps: float = 2.0) -> None:
+    """Save camera extrinsics subsampled to save_fps as NPZ.
+
+    Output NPZ keys:
+      poses         (M, 3, 4) float32  c2w extrinsic matrices
+      timestamps_ns (M,)      int64    relative timestamps (ns from frame 0)
+      frame_ids     (M,)      int64    original frame indices
+    """
+    ext = predictions.get("extrinsic")
+    if ext is None:
+        print("WARNING: no extrinsic in predictions — poses not saved")
+        return
+    if hasattr(ext, "numpy"):
+        ext = ext.numpy()
+    N = ext.shape[0]
+    step = max(1, round(source_fps / save_fps)) if save_fps > 0 else 1
+    idxs = np.arange(0, N, step, dtype=np.int64)
+    poses_sub = ext[idxs].astype(np.float32)
+    t_ns = (idxs * (1e9 / source_fps)).astype(np.int64)
+    os.makedirs(os.path.dirname(os.path.abspath(out_path)) or ".", exist_ok=True)
+    np.savez(out_path, poses=poses_sub, timestamps_ns=t_ns, frame_ids=idxs)
+    print(f"Poses saved: {out_path}  ({len(idxs)} poses @ {save_fps} fps)")
+
+
 def prepare_for_visualization(predictions, images=None):
    """Convert predictions to the unbatched NumPy format used by vis code."""
    vis_predictions = {}
@@ -354,6 +402,14 @@ def main():
    parser.add_argument("--export_preprocessed", type=str, default=None,
                        help="Export stride-sampled, resized/cropped images to this folder")

+    # Output export
+    parser.add_argument("--save_ply", type=str, default=None,
+                        help="Save point cloud to this PLY file path")
+    parser.add_argument("--save_poses", type=str, default=None,
+                        help="Save camera extrinsics to this NPZ file path")
+    parser.add_argument("--save_poses_fps", type=float, default=2.0,
+                        help="Subsampling FPS for saved poses (default 2.0)")
+
    args = parser.parse_args()
    assert args.image_folder or args.video_path, \
        "Provide --image_folder or --video_path"
@@ -497,6 +553,13 @@ def main():

    predictions, images_cpu = postprocess(predictions, images_for_post)

+    # ── Export ───────────────────────────────────────────────────────────────
+    source_fps = args.fps if args.video_path else 1.0
+    if args.save_ply:
+        _save_ply(predictions, args.save_ply, args.conf_threshold)
+    if args.save_poses:
+        _save_poses(predictions, len(paths), source_fps, args.save_poses, args.save_poses_fps)
+
    # ── Visualize ────────────────────────────────────────────────────────────
    try:
        from lingbot_map.vis import PointCloudViewer