stage03b: trim videos per run + ours rough cut
LRV proxy (GoPro low-res 768x432 H.264) + ffmpeg -c copy keyframe-aligned. Inputs: 02_runs.json + 03_video_index.json. Outputs: per-run mp4 + ours_<gp>.mp4 chrono concat. Tested on 20260505-Lepradet: 5 files + 2 ours (~11 GB total). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
228
pipeline/stages/03b_trim_runs.py
Executable file
228
pipeline/stages/03b_trim_runs.py
Executable file
@@ -0,0 +1,228 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Stage 03b - Trim videos per run (LRV proxies + -c copy, fast).
|
||||||
|
|
||||||
|
Inputs:
|
||||||
|
data/<MISSION>/02_runs.json
|
||||||
|
data/<MISSION>/03_video_index.json
|
||||||
|
|
||||||
|
Strategy:
|
||||||
|
- Use GoPro LRV proxy files (768x432 H.264 ~720 kbps) instead of 4K HEVC originals.
|
||||||
|
- ffmpeg -c copy per chapter (keyframe-aligned cut) + concat demuxer.
|
||||||
|
- Output: per-run .mp4 + ours_<gp>.mp4 (concat of per-run).
|
||||||
|
|
||||||
|
Falls back to MP4 source if matching LRV is missing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
OUT_ROOT = Path("/mnt/ssd/cosma-qc-out/03b_trim_runs")
|
||||||
|
|
||||||
|
|
||||||
|
def run_ff(cmd: list[str]) -> None:
|
||||||
|
r = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if r.returncode != 0:
|
||||||
|
sys.stderr.write(" ".join(cmd) + "\n")
|
||||||
|
sys.stderr.write(r.stderr[-3000:] + "\n")
|
||||||
|
raise RuntimeError(f"ffmpeg failed rc={r.returncode}")
|
||||||
|
|
||||||
|
|
||||||
|
def lrv_for_chapter(mp4_path: Path) -> Path | None:
|
||||||
|
"""Return matching .LRV path if it exists (GoPro low-res proxy)."""
|
||||||
|
name = mp4_path.name
|
||||||
|
if not name.startswith("GX") or not name.upper().endswith(".MP4"):
|
||||||
|
return None
|
||||||
|
lrv_name = "GL" + name[2:-4] + ".LRV"
|
||||||
|
p = mp4_path.parent / lrv_name
|
||||||
|
return p if p.exists() else None
|
||||||
|
|
||||||
|
|
||||||
|
def overlap_clips(run_start: float, run_end: float, chapters: list[dict]) -> list[tuple[dict, float, float]]:
|
||||||
|
"""Return [(chapter, start_off_s, duration_s)] for chapters overlapping the run."""
|
||||||
|
out = []
|
||||||
|
for ch in sorted(chapters, key=lambda c: c["start_epoch"]):
|
||||||
|
a = max(run_start, ch["start_epoch"])
|
||||||
|
b = min(run_end, ch["end_epoch"])
|
||||||
|
if b - a <= 1.0:
|
||||||
|
continue
|
||||||
|
start_off = max(0.0, run_start - ch["start_epoch"])
|
||||||
|
dur = b - a
|
||||||
|
out.append((ch, start_off, dur))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def cut_clip(src: Path, start_off: float, duration: float, dst: Path) -> None:
|
||||||
|
"""Cut [start_off, start_off+duration] from src using -c copy (keyframe-aligned)."""
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg", "-y", "-loglevel", "error",
|
||||||
|
"-ss", f"{start_off:.3f}",
|
||||||
|
"-i", str(src),
|
||||||
|
"-t", f"{duration:.3f}",
|
||||||
|
"-c", "copy",
|
||||||
|
"-avoid_negative_ts", "make_zero",
|
||||||
|
"-an",
|
||||||
|
str(dst),
|
||||||
|
]
|
||||||
|
run_ff(cmd)
|
||||||
|
|
||||||
|
|
||||||
|
def concat_demux(parts: list[Path], dst: Path) -> None:
|
||||||
|
"""Concat parts with ffmpeg concat demuxer (-c copy)."""
|
||||||
|
if not parts:
|
||||||
|
return
|
||||||
|
if len(parts) == 1:
|
||||||
|
shutil.copy2(parts[0], dst)
|
||||||
|
return
|
||||||
|
with tempfile.NamedTemporaryFile("w", suffix=".txt", delete=False) as f:
|
||||||
|
for p in parts:
|
||||||
|
f.write(f"file '{p.resolve()}'\n")
|
||||||
|
listfile = f.name
|
||||||
|
try:
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg", "-y", "-loglevel", "error",
|
||||||
|
"-f", "concat", "-safe", "0",
|
||||||
|
"-i", listfile,
|
||||||
|
"-c", "copy",
|
||||||
|
"-movflags", "+faststart",
|
||||||
|
"-an",
|
||||||
|
str(dst),
|
||||||
|
]
|
||||||
|
run_ff(cmd)
|
||||||
|
finally:
|
||||||
|
os.unlink(listfile)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--mission", required=True)
|
||||||
|
ap.add_argument("--data-root", default="/home/cosma/cosma-qc/data")
|
||||||
|
ap.add_argument("--skip-existing", action="store_true")
|
||||||
|
ap.add_argument("--prefer-source", choices=["lrv", "mp4"], default="lrv",
|
||||||
|
help="lrv = use .LRV proxy (default, fast); mp4 = use 4K originals")
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
mission = args.mission
|
||||||
|
data_dir = Path(args.data_root) / mission
|
||||||
|
|
||||||
|
runs = json.loads((data_dir / "02_runs.json").read_text())["runs"]
|
||||||
|
vidx = json.loads((data_dir / "03_video_index.json").read_text())
|
||||||
|
videos = vidx["videos"]
|
||||||
|
|
||||||
|
by_auv_gp: dict[tuple[str, str], list[dict]] = defaultdict(list)
|
||||||
|
for v in videos:
|
||||||
|
by_auv_gp[(v["auv"], v["gp"])].append(v)
|
||||||
|
all_gps = sorted({v["gp"] for v in videos})
|
||||||
|
|
||||||
|
out_dir = OUT_ROOT / mission
|
||||||
|
out_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp_dir = out_dir / "_tmp"
|
||||||
|
tmp_dir.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
link = data_dir / "03b_trim_runs"
|
||||||
|
if link.is_symlink():
|
||||||
|
link.unlink()
|
||||||
|
elif link.exists():
|
||||||
|
shutil.rmtree(link)
|
||||||
|
link.symlink_to(out_dir)
|
||||||
|
|
||||||
|
manifest = {
|
||||||
|
"mission": mission,
|
||||||
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"output_root": str(out_dir),
|
||||||
|
"source": args.prefer_source,
|
||||||
|
"runs": [],
|
||||||
|
"ours": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
runs_by_chrono = sorted(runs, key=lambda r: r["start_epoch"])
|
||||||
|
ours_parts: dict[str, list[tuple[float, Path, str]]] = defaultdict(list)
|
||||||
|
|
||||||
|
for run in runs_by_chrono:
|
||||||
|
run_id = run["run_id"]
|
||||||
|
auv = run["auv"]
|
||||||
|
r_start = run["start_epoch"]
|
||||||
|
r_end = run["end_epoch"]
|
||||||
|
run_entry = {"run_id": run_id, "auv": auv, "duration_s": run["duration_s"], "outputs": []}
|
||||||
|
|
||||||
|
for gp in all_gps:
|
||||||
|
chapters = by_auv_gp.get((auv, gp), [])
|
||||||
|
if not chapters:
|
||||||
|
continue
|
||||||
|
clips = overlap_clips(r_start, r_end, chapters)
|
||||||
|
if not clips:
|
||||||
|
continue
|
||||||
|
|
||||||
|
out_name = f"{run_id}_{auv}_{gp}.mp4"
|
||||||
|
out_path = out_dir / out_name
|
||||||
|
if args.skip_existing and out_path.exists() and out_path.stat().st_size > 0:
|
||||||
|
print(f"[skip] {out_name}", flush=True)
|
||||||
|
else:
|
||||||
|
# Pick source per chapter
|
||||||
|
resolved: list[tuple[Path, float, float, str]] = []
|
||||||
|
src_tags: list[str] = []
|
||||||
|
for ch, soff, dur in clips:
|
||||||
|
mp4 = Path(ch["filepath"])
|
||||||
|
src = mp4
|
||||||
|
tag = "mp4"
|
||||||
|
if args.prefer_source == "lrv":
|
||||||
|
lrv = lrv_for_chapter(mp4)
|
||||||
|
if lrv:
|
||||||
|
src = lrv
|
||||||
|
tag = "lrv"
|
||||||
|
resolved.append((src, soff, dur, tag))
|
||||||
|
src_tags.append(tag)
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"[cut ] {out_name} chapters={len(resolved)} src={','.join(src_tags)}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
tmp_parts: list[Path] = []
|
||||||
|
for i, (src, soff, dur, _) in enumerate(resolved):
|
||||||
|
tp = tmp_dir / f"{run_id}_{auv}_{gp}_p{i:02d}.mp4"
|
||||||
|
cut_clip(src, soff, dur, tp)
|
||||||
|
tmp_parts.append(tp)
|
||||||
|
concat_demux(tmp_parts, out_path)
|
||||||
|
for p in tmp_parts:
|
||||||
|
p.unlink(missing_ok=True)
|
||||||
|
|
||||||
|
sz_mb = round(out_path.stat().st_size / 1024 / 1024, 1)
|
||||||
|
run_entry["outputs"].append({"gp": gp, "file": out_name, "size_mb": sz_mb})
|
||||||
|
ours_parts[gp].append((r_start, out_path, f"{run_id} {auv}"))
|
||||||
|
|
||||||
|
manifest["runs"].append(run_entry)
|
||||||
|
|
||||||
|
for gp, parts in ours_parts.items():
|
||||||
|
parts.sort()
|
||||||
|
ordered_paths = [p for _, p, _ in parts]
|
||||||
|
ours_path = out_dir / f"ours_{gp}.mp4"
|
||||||
|
print(f"[ours] {ours_path.name} <- {len(ordered_paths)} clip(s)", flush=True)
|
||||||
|
concat_demux(ordered_paths, ours_path)
|
||||||
|
sz_mb = round(ours_path.stat().st_size / 1024 / 1024, 1)
|
||||||
|
manifest["ours"][gp] = {
|
||||||
|
"file": ours_path.name,
|
||||||
|
"size_mb": sz_mb,
|
||||||
|
"segments": [lbl for _, _, lbl in parts],
|
||||||
|
}
|
||||||
|
|
||||||
|
(data_dir / "03b_trim_runs.json").write_text(json.dumps(manifest, indent=2))
|
||||||
|
print(f"\n[done] manifest: {data_dir / '03b_trim_runs.json'}", flush=True)
|
||||||
|
print(f"[done] outputs: {out_dir}", flush=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
tmp_dir.rmdir()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user