- dispatcher: scp du MP4 source vers le worker avant ffmpeg (les chemins .82 ne sont pas accessibles côté .87) - dispatcher: wrapper shell autour de demo.py pour killer viser dès que le PLY est écrit (setsid + pkill -f frames_dir) - dispatcher: PLY_ok fallback — accepte rc!=0 si le PLY existe et a une taille > 0 - dispatcher: fallback frame_count abaissé à 150 pour l'estimation VRAM - ingest: strip du suffixe timezone (+00:00) des timestamps exiftool QuickTimeUTC=1 Testé bout-en-bout sur GX010001.MP4 (70 frames, 10.6M pts PLY, VRAM peak 9.4 GB, kill viser OK).
174 lines
5.9 KiB
Python
174 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
||
"""Scan an acquisition directory, group GoPro MP4s into continuous segments,
|
||
and insert jobs into the cosma-qc DB.
|
||
|
||
Usage:
|
||
python3 ingest.py /mnt/portablessd/COSMA-<date>/ --name "La Ciotat 8 avril" [--gap-min 5]
|
||
|
||
Directory layout expected (we saw this from the real SSD):
|
||
|
||
<root>/media/gopro{1,2}/GP{1,2}_AUV{209,210}/GX*.MP4
|
||
|
||
The AUV tag and GoPro id come from folder names. The serial is read via
|
||
exiftool (falls back to folder name if unavailable). Continuous segments are
|
||
derived from EXIF CreateDate timestamps with a configurable gap threshold.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import os
|
||
import re
|
||
import sqlite3
|
||
import subprocess
|
||
from datetime import datetime, timedelta
|
||
from pathlib import Path
|
||
|
||
DB_PATH = Path(os.environ.get("COSMA_QC_DB", "/var/lib/cosma-qc/jobs.db"))
|
||
FOLDER_RE = re.compile(r"GP(?P<gopro>\d+)_AUV(?P<auv>\d+)", re.I)
|
||
|
||
|
||
def exif_create_date(path: Path) -> datetime | None:
|
||
try:
|
||
out = subprocess.check_output(
|
||
["exiftool", "-s3", "-CreateDate", "-api", "QuickTimeUTC=1", str(path)],
|
||
stderr=subprocess.DEVNULL, text=True, timeout=10,
|
||
).strip()
|
||
if not out:
|
||
return None
|
||
# Strip timezone suffix (+HH:MM or -HH:MM) if present
|
||
import re as _re
|
||
out = _re.sub(r'[+-]\d{2}:\d{2}$', '', out).strip()
|
||
return datetime.strptime(out, "%Y:%m:%d %H:%M:%S")
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def exif_duration_s(path: Path) -> float | None:
|
||
try:
|
||
out = subprocess.check_output(
|
||
["exiftool", "-s3", "-Duration#", str(path)],
|
||
stderr=subprocess.DEVNULL, text=True, timeout=10,
|
||
).strip()
|
||
return float(out) if out else None
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
def exif_serial(path: Path) -> str | None:
|
||
try:
|
||
out = subprocess.check_output(
|
||
["exiftool", "-s3", "-SerialNumber", "-CameraSerialNumber", str(path)],
|
||
stderr=subprocess.DEVNULL, text=True, timeout=10,
|
||
).strip().splitlines()
|
||
for line in out:
|
||
line = line.strip()
|
||
if line:
|
||
return line
|
||
except Exception:
|
||
pass
|
||
return None
|
||
|
||
|
||
def group_segments(videos: list[dict], gap_min: int) -> list[dict]:
|
||
"""Group consecutive videos into segments when gap between end-of-A and
|
||
start-of-B is below `gap_min` minutes."""
|
||
videos = sorted(videos, key=lambda v: v["start"])
|
||
segments: list[list[dict]] = []
|
||
for v in videos:
|
||
if not segments:
|
||
segments.append([v]); continue
|
||
last = segments[-1][-1]
|
||
last_end = last["start"] + timedelta(seconds=last["duration"] or 0)
|
||
if (v["start"] - last_end) <= timedelta(minutes=gap_min):
|
||
segments[-1].append(v)
|
||
else:
|
||
segments.append([v])
|
||
out = []
|
||
for seg in segments:
|
||
start = seg[0]["start"]
|
||
end = seg[-1]["start"] + timedelta(seconds=seg[-1]["duration"] or 0)
|
||
out.append({
|
||
"start": start, "end": end,
|
||
"label": f"{start.strftime('%H:%M')}–{end.strftime('%H:%M')}",
|
||
"videos": [str(v["path"]) for v in seg],
|
||
})
|
||
return out
|
||
|
||
|
||
def scan(root: Path) -> dict:
|
||
"""Return {(auv, gopro_tag): {serial, videos[]}}"""
|
||
grouped: dict[tuple[str, str], dict] = {}
|
||
for mp4 in root.rglob("*.MP4"):
|
||
m = FOLDER_RE.search(str(mp4.parent))
|
||
if not m:
|
||
continue
|
||
auv = f"AUV{m.group('auv')}"
|
||
gopro_tag = f"GP{m.group('gopro')}"
|
||
key = (auv, gopro_tag)
|
||
start = exif_create_date(mp4)
|
||
dur = exif_duration_s(mp4)
|
||
if not start:
|
||
print(f" [skip] no CreateDate: {mp4}"); continue
|
||
serial = exif_serial(mp4)
|
||
slot = grouped.setdefault(key, {"serial": serial, "videos": []})
|
||
if serial and not slot["serial"]:
|
||
slot["serial"] = serial
|
||
slot["videos"].append({"path": mp4, "start": start, "duration": dur or 0})
|
||
return grouped
|
||
|
||
|
||
def main():
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument("root", type=Path)
|
||
ap.add_argument("--name", required=True, help="Acquisition name")
|
||
ap.add_argument("--gap-min", type=int, default=5, help="Max gap between videos in one segment")
|
||
ap.add_argument("--dry-run", action="store_true")
|
||
args = ap.parse_args()
|
||
|
||
if not args.root.exists():
|
||
raise SystemExit(f"root not found: {args.root}")
|
||
|
||
print(f"Scanning {args.root}...")
|
||
grouped = scan(args.root)
|
||
if not grouped:
|
||
print("No (auv, gopro) folders found — expected GPx_AUVyyy layout."); return
|
||
|
||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||
conn = sqlite3.connect(DB_PATH, isolation_level=None)
|
||
conn.execute("PRAGMA foreign_keys=ON")
|
||
conn.row_factory = sqlite3.Row
|
||
|
||
if args.dry_run:
|
||
acq_id = -1
|
||
else:
|
||
cur = conn.execute(
|
||
"INSERT INTO acquisitions (name, source_path) VALUES (?, ?)",
|
||
(args.name, str(args.root)),
|
||
)
|
||
acq_id = cur.lastrowid
|
||
print(f"Created acquisition id={acq_id}")
|
||
|
||
total_jobs = 0
|
||
for (auv, gopro_tag), info in sorted(grouped.items()):
|
||
serial = info["serial"] or gopro_tag
|
||
segs = group_segments(info["videos"], args.gap_min)
|
||
print(f"\n{auv} / {gopro_tag} (serial={serial}) — {len(info['videos'])} videos → {len(segs)} segments")
|
||
for seg in segs:
|
||
dur_min = (seg["end"] - seg["start"]).total_seconds() / 60
|
||
print(f" · {seg['label']} ({dur_min:.1f} min, {len(seg['videos'])} files)")
|
||
if args.dry_run:
|
||
continue
|
||
conn.execute("""
|
||
INSERT INTO jobs (acquisition_id, auv, gopro_serial, segment_label,
|
||
video_paths, status)
|
||
VALUES (?, ?, ?, ?, ?, 'queued')
|
||
""", (acq_id, auv, serial, seg["label"], json.dumps(seg["videos"])))
|
||
total_jobs += 1
|
||
|
||
print(f"\nInserted {total_jobs} jobs.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|