update viser
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,3 +10,4 @@ demo_render/
|
|||||||
CLAUDE.md
|
CLAUDE.md
|
||||||
.claude/
|
.claude/
|
||||||
.agents/
|
.agents/
|
||||||
|
skyseg.onnx
|
||||||
19
README.md
19
README.md
@@ -115,13 +115,30 @@ python demo.py --model_path /path/to/checkpoint.pt \
|
|||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### With Sky Masking
|
### Sky Masking
|
||||||
|
|
||||||
|
Sky masking uses an ONNX sky segmentation model to filter out sky points from the reconstructed point cloud, which improves visualization quality for outdoor scenes.
|
||||||
|
|
||||||
|
**Setup:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install onnxruntime (required)
|
||||||
|
pip install onnxruntime # CPU
|
||||||
|
# or
|
||||||
|
pip install onnxruntime-gpu # GPU (faster for large image sets)
|
||||||
|
```
|
||||||
|
|
||||||
|
The sky segmentation model (`skyseg.onnx`) will be automatically downloaded from [HuggingFace](https://huggingface.co/JianyuanWang/skyseg/resolve/main/skyseg.onnx) on first use.
|
||||||
|
|
||||||
|
**Usage:**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python demo.py --model_path /path/to/checkpoint.pt \
|
python demo.py --model_path /path/to/checkpoint.pt \
|
||||||
--image_folder /path/to/images/ --mask_sky
|
--image_folder /path/to/images/ --mask_sky
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Sky masks are cached in `<image_folder>_sky_masks/` so subsequent runs skip regeneration.
|
||||||
|
|
||||||
### Without FlashInfer (SDPA fallback)
|
### Without FlashInfer (SDPA fallback)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
16
demo.py
16
demo.py
@@ -138,9 +138,8 @@ _BATCHED_NDIMS = {
|
|||||||
"world_points_conf": 4,
|
"world_points_conf": 4,
|
||||||
"extrinsic": 4,
|
"extrinsic": 4,
|
||||||
"intrinsic": 4,
|
"intrinsic": 4,
|
||||||
"chunk_sim3_scales": 2,
|
"chunk_scales": 2,
|
||||||
"chunk_sim3_poses": 4,
|
"chunk_transforms": 4,
|
||||||
"chunk_se3_poses": 4,
|
|
||||||
"images": 5,
|
"images": 5,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -256,14 +255,13 @@ def main():
|
|||||||
# Windowed options
|
# Windowed options
|
||||||
parser.add_argument("--window_size", type=int, default=64, help="Frames per window (windowed mode)")
|
parser.add_argument("--window_size", type=int, default=64, help="Frames per window (windowed mode)")
|
||||||
parser.add_argument("--overlap_size", type=int, default=16, help="Overlap between windows")
|
parser.add_argument("--overlap_size", type=int, default=16, help="Overlap between windows")
|
||||||
parser.add_argument("--sim3", action="store_true", default=True, help="Use Sim(3) alignment between windows")
|
|
||||||
parser.add_argument("--no_sim3", dest="sim3", action="store_false", help="Disable Sim(3), use SE(3) instead")
|
|
||||||
|
|
||||||
# Visualization
|
# Visualization
|
||||||
parser.add_argument("--port", type=int, default=8080)
|
parser.add_argument("--port", type=int, default=8080)
|
||||||
parser.add_argument("--conf_threshold", type=float, default=1.0)
|
parser.add_argument("--conf_threshold", type=float, default=1.5)
|
||||||
parser.add_argument("--downsample_factor", type=int, default=10)
|
parser.add_argument("--downsample_factor", type=int, default=10)
|
||||||
parser.add_argument("--point_size", type=float, default=0.005)
|
parser.add_argument("--point_size", type=float, default=0.0007)
|
||||||
parser.add_argument("--mask_sky", action="store_true", help="Apply sky segmentation to filter out sky points")
|
parser.add_argument("--mask_sky", action="store_true", help="Apply sky segmentation to filter out sky points")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@@ -314,8 +312,6 @@ def main():
|
|||||||
window_size=args.window_size,
|
window_size=args.window_size,
|
||||||
overlap_size=args.overlap_size,
|
overlap_size=args.overlap_size,
|
||||||
num_scale_frames=args.num_scale_frames,
|
num_scale_frames=args.num_scale_frames,
|
||||||
sim3=args.sim3,
|
|
||||||
se3=not args.sim3,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
t_infer = time.time() - t0
|
t_infer = time.time() - t0
|
||||||
@@ -330,7 +326,7 @@ def main():
|
|||||||
viewer = PointCloudViewer(
|
viewer = PointCloudViewer(
|
||||||
pred_dict=prepare_for_visualization(predictions, images_cpu),
|
pred_dict=prepare_for_visualization(predictions, images_cpu),
|
||||||
port=args.port,
|
port=args.port,
|
||||||
init_conf_threshold=args.conf_threshold,
|
vis_threshold=args.conf_threshold,
|
||||||
downsample_factor=args.downsample_factor,
|
downsample_factor=args.downsample_factor,
|
||||||
point_size=args.point_size,
|
point_size=args.point_size,
|
||||||
mask_sky=args.mask_sky,
|
mask_sky=args.mask_sky,
|
||||||
|
|||||||
@@ -169,6 +169,11 @@ class FlashInferKVCacheManager:
|
|||||||
# Frame counter per block (determines scale vs window routing)
|
# Frame counter per block (determines scale vs window routing)
|
||||||
self.frame_count: List[int] = [0] * num_blocks
|
self.frame_count: List[int] = [0] * num_blocks
|
||||||
|
|
||||||
|
# Deferred eviction support for flow-based keyframe selection.
|
||||||
|
# When True, evict_frames() becomes a no-op; caller must later call
|
||||||
|
# execute_deferred_eviction() or rollback_last_frame().
|
||||||
|
self._defer_eviction: bool = False
|
||||||
|
|
||||||
# ── FlashInfer wrapper ───────────────────────────────────────────────
|
# ── FlashInfer wrapper ───────────────────────────────────────────────
|
||||||
# plan() is called once per frame step (block_idx == 0).
|
# plan() is called once per frame step (block_idx == 0).
|
||||||
# run() is called per layer, reusing the same aux structures.
|
# run() is called per layer, reusing the same aux structures.
|
||||||
@@ -237,11 +242,64 @@ class FlashInferKVCacheManager:
|
|||||||
Special pages are NEVER evicted.
|
Special pages are NEVER evicted.
|
||||||
Scale pages are NEVER evicted.
|
Scale pages are NEVER evicted.
|
||||||
Only live_window_patch_pages beyond `sliding_window` are recycled.
|
Only live_window_patch_pages beyond `sliding_window` are recycled.
|
||||||
|
|
||||||
|
When ``_defer_eviction`` is True, this method is a no-op. The caller
|
||||||
|
is expected to later call ``execute_deferred_eviction()`` (keep frame)
|
||||||
|
or ``rollback_last_frame()`` (discard frame).
|
||||||
"""
|
"""
|
||||||
|
if self._defer_eviction:
|
||||||
|
return
|
||||||
while len(self.live_window_patch_pages[block_idx]) > sliding_window:
|
while len(self.live_window_patch_pages[block_idx]) > sliding_window:
|
||||||
old_page = self.live_window_patch_pages[block_idx].popleft()
|
old_page = self.live_window_patch_pages[block_idx].popleft()
|
||||||
self.free_patch_pages[block_idx].append(old_page)
|
self.free_patch_pages[block_idx].append(old_page)
|
||||||
|
|
||||||
|
def execute_deferred_eviction(
|
||||||
|
self,
|
||||||
|
block_idx: int,
|
||||||
|
scale_frames: int,
|
||||||
|
sliding_window: int,
|
||||||
|
**kwargs,
|
||||||
|
) -> None:
|
||||||
|
"""Run the eviction that was skipped while ``_defer_eviction`` was True."""
|
||||||
|
while len(self.live_window_patch_pages[block_idx]) > sliding_window:
|
||||||
|
old_page = self.live_window_patch_pages[block_idx].popleft()
|
||||||
|
self.free_patch_pages[block_idx].append(old_page)
|
||||||
|
|
||||||
|
def rollback_last_frame(self, block_idx: int) -> None:
|
||||||
|
"""Undo the most recent ``append_frame()`` for *block_idx*.
|
||||||
|
|
||||||
|
This reverses all three sub-operations of ``append_frame``:
|
||||||
|
patch page allocation, special-token write, and frame_count increment.
|
||||||
|
It must be called **before** any eviction for that frame (i.e. while
|
||||||
|
``_defer_eviction`` is True or before ``evict_frames`` is called).
|
||||||
|
"""
|
||||||
|
assert self.frame_count[block_idx] > 0, (
|
||||||
|
f"block {block_idx}: cannot rollback, frame_count is 0"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 1) Undo patch page ── pop from whichever deque it was routed to.
|
||||||
|
if self.frame_count[block_idx] > self.scale_frames:
|
||||||
|
page_id = self.live_window_patch_pages[block_idx].pop()
|
||||||
|
else:
|
||||||
|
page_id = self.scale_patch_pages[block_idx].pop()
|
||||||
|
self.free_patch_pages[block_idx].append(page_id)
|
||||||
|
|
||||||
|
# 2) Undo special tokens
|
||||||
|
n = self.num_special_tokens
|
||||||
|
new_count = self.special_token_count[block_idx] - n
|
||||||
|
assert new_count >= 0, (
|
||||||
|
f"block {block_idx}: special_token_count underflow "
|
||||||
|
f"({self.special_token_count[block_idx]} - {n})"
|
||||||
|
)
|
||||||
|
new_num_pages = math.ceil(new_count / self.page_size) if new_count > 0 else 0
|
||||||
|
while len(self.all_special_pages[block_idx]) > new_num_pages:
|
||||||
|
freed = self.all_special_pages[block_idx].pop()
|
||||||
|
self.free_special_pages[block_idx].append(freed)
|
||||||
|
self.special_token_count[block_idx] = new_count
|
||||||
|
|
||||||
|
# 3) Decrement frame count
|
||||||
|
self.frame_count[block_idx] -= 1
|
||||||
|
|
||||||
def _gather_kv(self, block_idx: int):
|
def _gather_kv(self, block_idx: int):
|
||||||
"""
|
"""
|
||||||
Gather all visible K and V tokens from the paged cache into dense tensors.
|
Gather all visible K and V tokens from the paged cache into dense tensors.
|
||||||
|
|||||||
@@ -1486,7 +1486,7 @@ class PointCloudViewer:
|
|||||||
aspect=aspect,
|
aspect=aspect,
|
||||||
wxyz=q,
|
wxyz=q,
|
||||||
position=t,
|
position=t,
|
||||||
scale=0.1,
|
scale=0.03,
|
||||||
color=camera_color_rgb,
|
color=camera_color_rgb,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user