ingest+dispatcher — support acquisition depuis remote host via SSH
- ingest.py : --remote-host <alias> pour scanner/exiftool via SSH, stocke les chemins avec préfixe "alias:" pour que le worker sache puller direct - dispatcher.py : scp_to_worker détecte "host:path" et fait pull remote (worker → source host) au lieu du double hop via dispatcher - _path_basename gère les paths préfixés pour ffmpeg Permet d'ingester les vidéos depuis n'importe quelle machine accessible en SSH sans passer 145GB par le conteneur FastAPI.
This commit is contained in:
@@ -121,6 +121,24 @@ def count_frames(worker: dict, frames_dir: str) -> int:
|
||||
|
||||
|
||||
def scp_to_worker(local_path: str, worker: dict, remote_path: str):
|
||||
"""Copy a file to the worker.
|
||||
|
||||
`local_path` may be either:
|
||||
- a path on the dispatcher host (standard scp from here)
|
||||
- "host:abs_path" — pulled by the worker directly from `host`
|
||||
(avoids routing bytes through the dispatcher).
|
||||
"""
|
||||
if ":" in local_path and not local_path.startswith("/"):
|
||||
src_host, src_path = local_path.split(":", 1)
|
||||
# Pull from source host directly on the worker
|
||||
pull_cmd = (
|
||||
f"scp -o BatchMode=yes {shlex.quote(src_host)}:{shlex.quote(src_path)} "
|
||||
f"{shlex.quote(remote_path)}"
|
||||
)
|
||||
rc, _, err = ssh(worker["ssh_alias"], pull_cmd, timeout=7200)
|
||||
if rc != 0:
|
||||
raise RuntimeError(f"remote scp ({src_host}→{worker['host']}) failed: {err[:200]}")
|
||||
return
|
||||
r = subprocess.run(
|
||||
["scp", "-o", "BatchMode=yes", local_path, f"{worker['ssh_alias']}:{remote_path}"],
|
||||
capture_output=True, timeout=1800,
|
||||
@@ -129,6 +147,12 @@ def scp_to_worker(local_path: str, worker: dict, remote_path: str):
|
||||
raise RuntimeError(f"scp failed: {r.stderr.decode()[:200]}")
|
||||
|
||||
|
||||
def _path_basename(p: str) -> str:
|
||||
if ":" in p and not p.startswith("/"):
|
||||
return Path(p.split(":", 1)[1]).name
|
||||
return Path(p).name
|
||||
|
||||
|
||||
def do_extract(job: sqlite3.Row, worker: dict) -> str:
|
||||
videos = json.loads(job["video_paths"])
|
||||
frames_dir = f"{worker['frames_dir']}/job_{job['id']}"
|
||||
@@ -138,10 +162,10 @@ def do_extract(job: sqlite3.Row, worker: dict) -> str:
|
||||
vf = f"fps={FPS},scale={IMG_W}:{IMG_H}"
|
||||
pattern = f"{frames_dir}/frame_%06d.jpg"
|
||||
# Copy video to worker if it doesn't exist there
|
||||
worker_src = f"{frames_dir}/src_{Path(v).name}"
|
||||
worker_src = f"{frames_dir}/src_{_path_basename(v)}"
|
||||
rc_check = ssh(worker["ssh_alias"], f"test -f {shlex.quote(worker_src)}")[0]
|
||||
if rc_check != 0:
|
||||
print(f" scp {Path(v).name} → {worker['host']}...")
|
||||
print(f" scp {_path_basename(v)} → {worker['host']}...")
|
||||
scp_to_worker(v, worker, worker_src)
|
||||
cmd = (
|
||||
f"ffmpeg -hide_banner -loglevel error -i {shlex.quote(worker_src)} "
|
||||
|
||||
Reference in New Issue
Block a user