intMinsu
diff --git a/‎config/experiment/re10k_1x8_ours.yaml
+2-2 b/‎config/experiment/re10k_1x8_ours.yaml
+2-2
diff --git a/‎config/model/decoder/gsplat_2dgs.yaml ‎config/model/decoder/gsplat_cuda.yaml
+2-2 b/‎config/model/decoder/gsplat_2dgs.yaml ‎config/model/decoder/gsplat_cuda.yaml
+2-2
diff --git a/‎config/model/decoder/splatting_cuda_2dgs.yaml
+3 b/‎config/model/decoder/splatting_cuda_2dgs.yaml
+3
diff --git a/‎diff-surfel-rasterization
+1 b/‎diff-surfel-rasterization
+1
diff --git a/‎pixi.lock
+250 b/‎pixi.lock
+250
diff --git a/‎pixi.toml
+8-1 b/‎pixi.toml
+8-1
diff --git a/‎src/dataset/dataset_re10k.py
+7-1 b/‎src/dataset/dataset_re10k.py
+7-1
diff --git a/‎src/logger_setup.py
+2-1 b/‎src/logger_setup.py
+2-1
diff --git a/‎src/model/decoder/__init__.py
+4-3 b/‎src/model/decoder/__init__.py
+4-3
diff --git a/‎src/model/decoder/cuda_splatting.py
+1 b/‎src/model/decoder/cuda_splatting.py
+1
diff --git a/‎src/model/decoder/cuda_splatting_2dgs.py
+232 b/‎src/model/decoder/cuda_splatting_2dgs.py
+232
diff --git a/‎src/model/decoder/decoder.py
+2-1 b/‎src/model/decoder/decoder.py
+2-1
diff --git a/‎src/model/decoder/decoder_splatting_cuda.py
+11 b/‎src/model/decoder/decoder_splatting_cuda.py
+11
@@ -4,7 +4,7 @@ defaults:
   - /dataset@_group_.re10k: re10k
   - override /model/encoder: noposplat
   - override /model/encoder/backbone: croco
-  - override /model/decoder: gsplat_2dgs
+  - override /model/decoder: splatting_cuda_2dgs #splatting_cuda | gsplat_cuda | splatting_cuda_2dgs
   - override /loss: [mse, lpips]
 
 wandb:
@@ -33,7 +33,7 @@ optimizer:
 
 data_loader:
   train:
-    batch_size: 6
+    batch_size: 1
 
 trainer:
   max_steps: 200_001
 
@@ -1,12 +1,12 @@
-name: gsplat_2dgs
+name: gsplat_cuda
 background_color: null
 make_scale_invariant: false
 radius_clip: 0.0
 eps2d: 0.3
 sh_degree: 3
 packed: true
 tile_size: 16
-render_mode: RGB
+render_mode: RGB+D
 sparse_grad: false
 absgrad: false
 distloss: false
@@ -0,0 +1,3 @@
+name: splatting_cuda_2dgs
+background_color: [0.0, 0.0, 0.0]
+make_scale_invariant: false
@@ -0,0 +1 @@
+Subproject commit e0ed0207b3e0669960cfad70852200a4a5847f61
@@ -110,5 +110,12 @@ nvidia-dali-cuda120 = {version = "*"}
 [feature.deepspeed.pypi-dependencies]
 deepspeed = {version = "*"}
 
+[feature.2dgs.pypi-dependencies]
+open3d = {version = ">=0.18.0,<=0.19.0"}
+mediapy = "*"
+
+[feature.2dgs.tasks]
+install-gaussian-surfel-rasterizer = "git clone https://github.com/hbb1/diff-surfel-rasterization.git && pip install ./diff-surfel-rasterization"
+
 [environments]
-npsplat = {features = ["py310", "torch241cu121", "ropebuild", "noposplat", "gsplat", "pycharm", "dali", "deepspeed"]}
+npsplat = {features = ["py310", "torch241cu121", "ropebuild", "noposplat", "gsplat", "pycharm", "dali", "deepspeed", "2dgs"]}
@@ -203,9 +203,15 @@ def __iter__(self):
                     },
                     "scene": scene,
                 }
+
                 if self.stage == "train" and self.cfg.augment:
                     example = apply_augmentation_shim(example)
-                yield apply_crop_shim(example, tuple(self.cfg.input_image_shape))
+
+                crop_example = apply_crop_shim(example, tuple(self.cfg.input_image_shape))
+
+                #print(f"\nBefore crop\n{example['target']['intrinsics'][0]} \n\nAfter crop\n{crop_example['target']['intrinsics'][0]}")
+                #print(f"After crop\n{crop_example['target']['intrinsics'][0]}")
+                yield crop_example
 
     def convert_poses(
         self,
 
@@ -1,7 +1,8 @@
 from omegaconf import OmegaConf
 from lightning.pytorch.loggers.wandb import WandbLogger
+from lightning.pytorch.utilities import rank_zero_only
 
-class WandbLoggerManager:
+class WandbLoggerManager():
     _logger = None
 
     @classmethod
 
@@ -1,14 +1,15 @@
 from .decoder import Decoder
 from .decoder_splatting_cuda import DecoderSplattingCUDA, DecoderSplattingCUDACfg
+from .decoder_splatting_cuda_2dgs import DecoderSplattingCUDA2DGS, DecoderSplattingCUDA2DGSCfg
 from .decoder_splatting_gsplat_cuda import DecoderGSplattingCUDA, DecoderGSplatting2DGSCfg
 
 DECODERS = {
     "splatting_cuda": DecoderSplattingCUDA,
-    "gsplat_2dgs": DecoderGSplattingCUDA,
+    "splatting_cuda_2dgs": DecoderSplattingCUDA2DGS,
+    "gsplat_cuda": DecoderGSplattingCUDA,
 }
 
-# DecoderCfg = DecoderSplattingCUDACfg | DecoderGSplatting2DGSCfg
-DecoderCfg = DecoderGSplatting2DGSCfg
+DecoderCfg = DecoderSplattingCUDACfg | DecoderSplattingCUDA2DGSCfg | DecoderGSplatting2DGSCfg
 
 def get_decoder(decoder_cfg: DecoderCfg) -> Decoder:
     return DECODERS[decoder_cfg.name](decoder_cfg)
@@ -91,6 +91,7 @@ def render_cuda(
     all_radii = []
     all_depths = []
     for i in range(b):
+        print(f"cuda_splatting i, b*v : {i}, {b}")
         # Set up a tensor for the gradients of the screen-space means.
         mean_gradients = torch.zeros_like(gaussian_means[i], requires_grad=True)
         try:
 
@@ -0,0 +1,232 @@
+from math import isqrt
+from typing import Literal
+
+import torch
+from diff_surfel_rasterization import (
+    GaussianRasterizationSettings,
+    GaussianRasterizer,
+)
+from einops import einsum, rearrange, repeat
+from jaxtyping import Float
+from torch import Tensor
+
+from ...geometry.projection import get_fov, homogenize_points
+
+
+def get_projection_matrix(
+    near: Float[Tensor, " batch"],
+    far: Float[Tensor, " batch"],
+    fov_x: Float[Tensor, " batch"],
+    fov_y: Float[Tensor, " batch"],
+) -> Float[Tensor, "batch 4 4"]:
+    """Maps points in the viewing frustum to (-1, 1) on the X/Y axes and (0, 1) on the Z
+    axis. Differs from the OpenGL version in that Z doesn't have range (-1, 1) after
+    transformation and that Z is flipped.
+    """
+    tan_fov_x = (0.5 * fov_x).tan()
+    tan_fov_y = (0.5 * fov_y).tan()
+
+    top = tan_fov_y * near
+    bottom = -top
+    right = tan_fov_x * near
+    left = -right
+
+    (b,) = near.shape
+    result = torch.zeros((b, 4, 4), dtype=torch.float32, device=near.device)
+    result[:, 0, 0] = 2 * near / (right - left)
+    result[:, 1, 1] = 2 * near / (top - bottom)
+    result[:, 0, 2] = (right + left) / (right - left)
+    result[:, 1, 2] = (top + bottom) / (top - bottom)
+    result[:, 3, 2] = 1
+    result[:, 2, 2] = far / (far - near)
+    result[:, 2, 3] = -(far * near) / (far - near)
+    return result
+
+
+def render_cuda_2dgs(
+    extrinsics: Float[Tensor, "batch 4 4"],
+    intrinsics: Float[Tensor, "batch 3 3"],
+    near: Float[Tensor, " batch"],
+    far: Float[Tensor, " batch"],
+    image_shape: tuple[int, int],
+    background_color: Float[Tensor, "batch 3"],
+    gaussian_means: Float[Tensor, "batch gaussian 3"],
+    gaussian_covariances: Float[Tensor, "batch gaussian 3 3"],
+    gaussian_sh_coefficients: Float[Tensor, "batch gaussian 3 d_sh"],
+    gaussian_opacities: Float[Tensor, "batch gaussian"],
+    scale_invariant: bool = True,
+    use_sh: bool = True,
+    #cam_rot_delta: Float[Tensor, "batch 3"] | None = None,
+    #cam_trans_delta: Float[Tensor, "batch 3"] | None = None,
+) -> tuple[Float[Tensor, "batch 3 height width"], Float[Tensor, "batch _ height width"]]:
+    assert use_sh or gaussian_sh_coefficients.shape[-1] == 1
+
+    # Make sure everything is in a range where numerical issues don't appear.
+    if scale_invariant:
+        scale = 1 / near
+        extrinsics = extrinsics.clone()
+        extrinsics[..., :3, 3] = extrinsics[..., :3, 3] * scale[:, None]
+        gaussian_covariances = gaussian_covariances * (scale[:, None, None, None] ** 2)
+        gaussian_means = gaussian_means * scale[:, None, None]
+        near = near * scale
+        far = far * scale
+
+    _, _, _, n = gaussian_sh_coefficients.shape
+    degree = isqrt(n) - 1
+    shs = rearrange(gaussian_sh_coefficients, "b g xyz n -> b g n xyz").contiguous()
+
+    b, _, _ = extrinsics.shape
+    h, w = image_shape
+
+    fov_x, fov_y = get_fov(intrinsics).unbind(dim=-1)
+    tan_fov_x = (0.5 * fov_x).tan()
+    tan_fov_y = (0.5 * fov_y).tan()
+
+    projection_matrix = get_projection_matrix(near, far, fov_x, fov_y)
+    projection_matrix = rearrange(projection_matrix, "b i j -> b j i")
+    view_matrix = rearrange(extrinsics.inverse(), "b i j -> b j i")
+    full_projection = view_matrix @ projection_matrix
+
+    all_images = []
+    all_radii = []
+    all_maps = []
+
+    for i in range(b):
+        print(f"cuda_splatting 2dgs i, b*v : {i}, {b}")
+        # Set up a tensor for the gradients of the screen-space means.
+        mean_gradients = torch.zeros_like(gaussian_means[i], requires_grad=True)
+        try:
+            mean_gradients.retain_grad()
+        except Exception:
+            pass
+
+        settings = GaussianRasterizationSettings(
+            image_height=h,
+            image_width=w,
+            tanfovx=tan_fov_x[i].item(),
+            tanfovy=tan_fov_y[i].item(),
+            bg=background_color[i],
+            scale_modifier=1.0,
+            viewmatrix=view_matrix[i],
+            projmatrix=full_projection[i],
+            #projmatrix_raw=projection_matrix[i],
+            sh_degree=degree,
+            campos=extrinsics[i, :3, 3],
+            prefiltered=False,  # This matches the original usage.
+            debug=True,
+        )
+        rasterizer = GaussianRasterizer(settings)
+
+        row, col = torch.triu_indices(3, 3)
+
+        image, radii, allmap = rasterizer(
+            means3D=gaussian_means[i],
+            means2D=mean_gradients,
+            shs=shs[i] if use_sh else None,
+            colors_precomp=None if use_sh else shs[i, :, 0, :],
+            opacities=gaussian_opacities[i, ..., None],
+            cov3D_precomp=gaussian_covariances[i, :, row, col],
+            #theta=cam_rot_delta[i] if cam_rot_delta is not None else None,
+            #rho=cam_trans_delta[i] if cam_trans_delta is not None else None,
+        )
+        all_images.append(image)
+        all_radii.append(radii)
+        all_maps.append(allmap.squeeze(0))
+    return torch.stack(all_images), torch.stack(all_maps)
+
+
+def render_cuda_orthographic(
+    extrinsics: Float[Tensor, "batch 4 4"],
+    width: Float[Tensor, " batch"],
+    height: Float[Tensor, " batch"],
+    near: Float[Tensor, " batch"],
+    far: Float[Tensor, " batch"],
+    image_shape: tuple[int, int],
+    background_color: Float[Tensor, "batch 3"],
+    gaussian_means: Float[Tensor, "batch gaussian 3"],
+    gaussian_covariances: Float[Tensor, "batch gaussian 3 3"],
+    gaussian_sh_coefficients: Float[Tensor, "batch gaussian 3 d_sh"],
+    gaussian_opacities: Float[Tensor, "batch gaussian"],
+    fov_degrees: float = 0.1,
+    use_sh: bool = True,
+    dump: dict | None = None,
+) -> Float[Tensor, "batch 3 height width"]:
+    b, _, _ = extrinsics.shape
+    h, w = image_shape
+    assert use_sh or gaussian_sh_coefficients.shape[-1] == 1
+
+    _, _, _, n = gaussian_sh_coefficients.shape
+    degree = isqrt(n) - 1
+    shs = rearrange(gaussian_sh_coefficients, "b g xyz n -> b g n xyz").contiguous()
+
+    # Create fake "orthographic" projection by moving the camera back and picking a
+    # small field of view.
+    fov_x = torch.tensor(fov_degrees, device=extrinsics.device).deg2rad()
+    tan_fov_x = (0.5 * fov_x).tan()
+    distance_to_near = (0.5 * width) / tan_fov_x
+    tan_fov_y = 0.5 * height / distance_to_near
+    fov_y = (2 * tan_fov_y).atan()
+    near = near + distance_to_near
+    far = far + distance_to_near
+    move_back = torch.eye(4, dtype=torch.float32, device=extrinsics.device)
+    move_back[2, 3] = -distance_to_near
+    extrinsics = extrinsics @ move_back
+
+    # Escape hatch for visualization/figures.
+    if dump is not None:
+        dump["extrinsics"] = extrinsics
+        dump["fov_x"] = fov_x
+        dump["fov_y"] = fov_y
+        dump["near"] = near
+        dump["far"] = far
+
+    projection_matrix = get_projection_matrix(
+        near, far, repeat(fov_x, "-> b", b=b), fov_y
+    )
+    projection_matrix = rearrange(projection_matrix, "b i j -> b j i")
+    view_matrix = rearrange(extrinsics.inverse(), "b i j -> b j i")
+    full_projection = view_matrix @ projection_matrix
+
+    all_images = []
+    all_radii = []
+    for i in range(b):
+        # Set up a tensor for the gradients of the screen-space means.
+        mean_gradients = torch.zeros_like(gaussian_means[i], requires_grad=True)
+        try:
+            mean_gradients.retain_grad()
+        except Exception:
+            pass
+
+        settings = GaussianRasterizationSettings(
+            image_height=h,
+            image_width=w,
+            tanfovx=tan_fov_x,
+            tanfovy=tan_fov_y,
+            bg=background_color[i],
+            scale_modifier=1.0,
+            viewmatrix=view_matrix[i],
+            projmatrix=full_projection[i],
+            projmatrix_raw=projection_matrix[i],
+            sh_degree=degree,
+            campos=extrinsics[i, :3, 3],
+            prefiltered=False,  # This matches the original usage.
+            debug=False,
+        )
+        rasterizer = GaussianRasterizer(settings)
+
+        row, col = torch.triu_indices(3, 3)
+
+        image, radii, depth, opacity, n_touched = rasterizer(
+            means3D=gaussian_means[i],
+            means2D=mean_gradients,
+            shs=shs[i] if use_sh else None,
+            colors_precomp=None if use_sh else shs[i, :, 0, :],
+            opacities=gaussian_opacities[i, ..., None],
+            cov3D_precomp=gaussian_covariances[i, :, row, col],
+        )
+        all_images.append(image)
+        all_radii.append(radii)
+    return torch.stack(all_images)
+
+
+DepthRenderingMode = Literal["depth", "disparity", "relative_disparity", "log"]
@@ -16,7 +16,7 @@
 
 
 @dataclass
-class    DecoderOutput:
+class DecoderOutput:
     color: Float[Tensor, "batch view 3 height width"]
     depth: Float[Tensor, "batch view height width"] | None
 
@@ -41,5 +41,6 @@ def forward(
         far: Float[Tensor, "batch view"],
         image_shape: tuple[int, int],
         depth_mode: DepthRenderingMode | None = None,
+        global_step: int | None = None,
     ) -> DecoderOutput:
         pass
@@ -11,6 +11,7 @@
 from .cuda_splatting import DepthRenderingMode, render_cuda
 from .decoder import Decoder, DecoderOutput
 
+from src.logger_setup import WandbLoggerManager
 
 @dataclass
 class DecoderSplattingCUDACfg:
@@ -45,8 +46,12 @@ def forward(
         depth_mode: DepthRenderingMode | None = None,
         cam_rot_delta: Float[Tensor, "batch view 3"] | None = None,
         cam_trans_delta: Float[Tensor, "batch view 3"] | None = None,
+        global_step: int | None = None,
     ) -> DecoderOutput:
+        wandb_logger = WandbLoggerManager.get_logger()
+
         b, v, _, _ = extrinsics.shape
+
         color, depth = render_cuda(
             rearrange(extrinsics, "b v i j -> (b v) i j"),
             rearrange(intrinsics, "b v i j -> (b v) i j"),
@@ -64,5 +69,11 @@ def forward(
         )
         color = rearrange(color, "(b v) c h w -> b v c h w", b=b, v=v)
 
+        wandb_logger.log_image(
+            "rasterized output of first batch",
+            [color[0, i] for i in range(color.shape[1])],
+            step=global_step,
+        )
+
         depth = rearrange(depth, "(b v) h w -> b v h w", b=b, v=v)
         return DecoderOutput(color, depth)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+name: splatting_cuda_2dgs`
	`2`	`+background_color: [0.0, 0.0, 0.0]`
	`3`	`+make_scale_invariant: false`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Subproject commit e0ed0207b3e0669960cfad70852200a4a5847f61`