diff --git a/demo.py b/demo.py index 387aae2..b684344 100644 --- a/demo.py +++ b/demo.py @@ -254,7 +254,7 @@ def main(): default=1, help="Streaming only. Every N-th frame after scale frames is kept as a keyframe. 1 = every frame.", ) - parser.add_argument("--kv_cache_sliding_window", type=int, default=64) + parser.add_argument("--kv_cache_sliding_window", type=int, default=16) parser.add_argument("--kv_cache_scale_frames", type=int, default=8) parser.add_argument("--use_sdpa", action="store_true", default=False, help="Use SDPA backend (no flashinfer needed). Default: FlashInfer") @@ -340,8 +340,7 @@ def main(): num_scale_frames=args.num_scale_frames, ) - t_infer = time.time() - t0 - print(f"Inference done: {t_infer:.1f}s ({num_frames / t_infer:.1f} FPS)") + print(f"Inference done in {time.time() - t0:.1f}s") # ── Post-process ───────────────────────────────────────────────────────── predictions, images_cpu = postprocess(predictions, images)