first commit

2023-08-02 19:51:43 -07:00
parent c2891c38cc
commit 13e18567fa
202 changed files with 43362 additions and 17 deletions
--- a/nerf/provider.py
+++ b/nerf/provider.py
@@ -0,0 +1,329 @@
+import random
+import numpy as np
+from scipy.spatial.transform import Slerp, Rotation
+
+
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+
+from .utils import get_rays, safe_normalize
+
+DIR_COLORS = np.array([
+    [255, 0, 0, 255], # front
+    [0, 255, 0, 255], # side
+    [0, 0, 255, 255], # back
+    [255, 255, 0, 255], # side
+    [255, 0, 255, 255], # overhead
+    [0, 255, 255, 255], # bottom
+], dtype=np.uint8)
+
+def visualize_poses(poses, dirs, size=0.1):
+    # poses: [B, 4, 4], dirs: [B]
+    import trimesh
+    axes = trimesh.creation.axis(axis_length=4)
+    sphere = trimesh.creation.icosphere(radius=1)
+    objects = [axes, sphere]
+
+    for pose, dir in zip(poses, dirs):
+        # a camera is visualized with 8 line segments.
+        pos = pose[:3, 3]
+        a = pos + size * pose[:3, 0] + size * pose[:3, 1] - size * pose[:3, 2]
+        b = pos - size * pose[:3, 0] + size * pose[:3, 1] - size * pose[:3, 2]
+        c = pos - size * pose[:3, 0] - size * pose[:3, 1] - size * pose[:3, 2]
+        d = pos + size * pose[:3, 0] - size * pose[:3, 1] - size * pose[:3, 2]
+
+        segs = np.array([[pos, a], [pos, b], [pos, c], [pos, d], [a, b], [b, c], [c, d], [d, a]])
+        segs = trimesh.load_path(segs)
+
+        # different color for different dirs
+        segs.colors = DIR_COLORS[[dir]].repeat(len(segs.entities), 0)
+
+        objects.append(segs)
+
+    trimesh.Scene(objects).show()
+
+def get_view_direction(thetas, phis, overhead, front):
+    #                   phis [B,];          thetas: [B,]
+    # front = 0         [0, front)
+    # side (right) = 1   [front, 180)
+    # back = 2          [180, 180+front)
+    # side (left) = 3  [180+front, 360)
+    # top = 4                               [0, overhead]
+    # bottom = 5                            [180-overhead, 180]
+    res = torch.zeros(thetas.shape[0], dtype=torch.long)
+    # first determine by phis
+    phis = phis % (2 * np.pi)
+    res[(phis < front / 2) | (phis >= 2 * np.pi - front / 2)] = 0
+    res[(phis >= front / 2) & (phis < np.pi - front / 2)] = 1
+    res[(phis >= np.pi - front / 2) & (phis < np.pi + front / 2)] = 2
+    res[(phis >= np.pi + front / 2) & (phis < 2 * np.pi - front / 2)] = 3
+    # override by thetas
+    res[thetas <= overhead] = 4
+    res[thetas >= (np.pi - overhead)] = 5
+    return res
+
+
+def rand_poses(size, device, opt, radius_range=[1, 1.5], theta_range=[0, 120], phi_range=[0, 360], return_dirs=False, angle_overhead=30, angle_front=60, uniform_sphere_rate=0.5):
+    ''' generate random poses from an orbit camera
+    Args:
+        size: batch size of generated poses.
+        device: where to allocate the output.
+        radius: camera radius
+        theta_range: [min, max], should be in [0, pi]
+        phi_range: [min, max], should be in [0, 2 * pi]
+    Return:
+        poses: [size, 4, 4]
+    '''
+
+    theta_range = np.array(theta_range) / 180 * np.pi
+    phi_range = np.array(phi_range) / 180 * np.pi
+    angle_overhead = angle_overhead / 180 * np.pi
+    angle_front = angle_front / 180 * np.pi
+
+    radius = torch.rand(size, device=device) * (radius_range[1] - radius_range[0]) + radius_range[0]
+
+    if random.random() < uniform_sphere_rate:
+        unit_centers = F.normalize(
+            torch.stack([
+                (torch.rand(size, device=device) - 0.5) * 2.0,
+                torch.rand(size, device=device),
+                (torch.rand(size, device=device) - 0.5) * 2.0,
+            ], dim=-1), p=2, dim=1
+        )
+        thetas = torch.acos(unit_centers[:,1])
+        phis = torch.atan2(unit_centers[:,0], unit_centers[:,2])
+        phis[phis < 0] += 2 * np.pi
+        centers = unit_centers * radius.unsqueeze(-1)
+    else:
+        thetas = torch.rand(size, device=device) * (theta_range[1] - theta_range[0]) + theta_range[0]
+        phis = torch.rand(size, device=device) * (phi_range[1] - phi_range[0]) + phi_range[0]
+        phis[phis < 0] += 2 * np.pi
+
+        centers = torch.stack([
+            radius * torch.sin(thetas) * torch.sin(phis),
+            radius * torch.cos(thetas),
+            radius * torch.sin(thetas) * torch.cos(phis),
+        ], dim=-1) # [B, 3]
+
+    targets = 0
+
+    # jitters
+    if opt.jitter_pose:
+        jit_center = opt.jitter_center # 0.015  # was 0.2
+        jit_target = opt.jitter_target
+        centers += torch.rand_like(centers) * jit_center - jit_center/2.0
+        targets += torch.randn_like(centers) * jit_target
+
+    # lookat
+    forward_vector = safe_normalize(centers - targets)
+    up_vector = torch.FloatTensor([0, 1, 0]).to(device).unsqueeze(0).repeat(size, 1)
+    right_vector = safe_normalize(torch.cross(forward_vector, up_vector, dim=-1))
+
+    if opt.jitter_pose:
+        up_noise = torch.randn_like(up_vector) * opt.jitter_up
+    else:
+        up_noise = 0
+
+    up_vector = safe_normalize(torch.cross(right_vector, forward_vector, dim=-1) + up_noise)
+
+    poses = torch.eye(4, dtype=torch.float, device=device).unsqueeze(0).repeat(size, 1, 1)
+    poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
+    poses[:, :3, 3] = centers
+
+    if return_dirs:
+        dirs = get_view_direction(thetas, phis, angle_overhead, angle_front)
+    else:
+        dirs = None
+
+    # back to degree
+    thetas = thetas / np.pi * 180
+    phis = phis / np.pi * 180
+
+    return poses, dirs, thetas, phis, radius
+
+
+def circle_poses(device, radius=torch.tensor([3.2]), theta=torch.tensor([60]), phi=torch.tensor([0]), return_dirs=False, angle_overhead=30, angle_front=60):
+
+    theta = theta / 180 * np.pi
+    phi = phi / 180 * np.pi
+    angle_overhead = angle_overhead / 180 * np.pi
+    angle_front = angle_front / 180 * np.pi
+    
+    centers = torch.stack([
+        radius * torch.sin(theta) * torch.sin(phi),
+        radius * torch.cos(theta),
+        radius * torch.sin(theta) * torch.cos(phi),
+    ], dim=-1) # [B, 3]
+
+    # lookat
+    forward_vector = safe_normalize(centers)
+    up_vector = torch.FloatTensor([0, 1, 0]).to(device).unsqueeze(0).repeat(len(centers), 1)
+    right_vector = safe_normalize(torch.cross(forward_vector, up_vector, dim=-1))
+    up_vector = safe_normalize(torch.cross(right_vector, forward_vector, dim=-1))
+
+    poses = torch.eye(4, dtype=torch.float, device=device).unsqueeze(0).repeat(len(centers), 1, 1)
+    poses[:, :3, :3] = torch.stack((right_vector, up_vector, forward_vector), dim=-1)
+    poses[:, :3, 3] = centers
+
+    if return_dirs:
+        dirs = get_view_direction(theta, phi, angle_overhead, angle_front)
+    else:
+        dirs = None
+
+    return poses, dirs
+
+
+class NeRFDataset:
+    def __init__(self, opt, device, type='train', H=256, W=256, size=100):
+        super().__init__()
+
+        self.opt = opt
+        self.device = device
+        self.type = type # train, val, test
+
+        self.H = H
+        self.W = W
+        self.size = size
+
+        self.training = self.type in ['train', 'all']
+
+        self.cx = self.H / 2
+        self.cy = self.W / 2
+
+        self.near = self.opt.min_near
+        self.far = 1000 # infinite
+
+        # [debug] visualize poses
+        # poses, dirs, _, _, _ = rand_poses(100, self.device, opt, radius_range=self.opt.radius_range, angle_overhead=self.opt.angle_overhead, angle_front=self.opt.angle_front, jitter=self.opt.jitter_pose, uniform_sphere_rate=1)
+        # visualize_poses(poses.detach().cpu().numpy(), dirs.detach().cpu().numpy())
+
+    def get_default_view_data(self):
+
+        H = int(self.opt.known_view_scale * self.H)
+        W = int(self.opt.known_view_scale * self.W)
+        cx = H / 2
+        cy = W / 2
+
+        radii = torch.FloatTensor(self.opt.ref_radii).to(self.device)
+        thetas = torch.FloatTensor(self.opt.ref_polars).to(self.device)
+        phis = torch.FloatTensor(self.opt.ref_azimuths).to(self.device)
+        poses, dirs = circle_poses(self.device, radius=radii, theta=thetas, phi=phis, return_dirs=True, angle_overhead=self.opt.angle_overhead, angle_front=self.opt.angle_front)
+        fov = self.opt.default_fovy
+        focal = H / (2 * np.tan(np.deg2rad(fov) / 2))
+        intrinsics = np.array([focal, focal, cx, cy])
+
+        projection = torch.tensor([
+            [2*focal/W, 0, 0, 0],
+            [0, -2*focal/H, 0, 0],
+            [0, 0, -(self.far+self.near)/(self.far-self.near), -(2*self.far*self.near)/(self.far-self.near)],
+            [0, 0, -1, 0]
+        ], dtype=torch.float32, device=self.device).unsqueeze(0).repeat(len(radii), 1, 1)
+
+        mvp = projection @ torch.inverse(poses) # [B, 4, 4]
+
+        # sample a low-resolution but full image
+        rays = get_rays(poses, intrinsics, H, W, -1)
+
+        data = {
+            'H': H,
+            'W': W,
+            'rays_o': rays['rays_o'],
+            'rays_d': rays['rays_d'],
+            'dir': dirs,
+            'mvp': mvp,
+            'polar': self.opt.ref_polars,
+            'azimuth': self.opt.ref_azimuths,
+            'radius': self.opt.ref_radii,
+        }
+
+        return data
+
+    def collate(self, index):
+
+        B = len(index)
+
+        if self.training:
+            # random pose on the fly
+            poses, dirs, thetas, phis, radius = rand_poses(B, self.device, self.opt, radius_range=self.opt.radius_range, theta_range=self.opt.theta_range, phi_range=self.opt.phi_range, return_dirs=True, angle_overhead=self.opt.angle_overhead, angle_front=self.opt.angle_front, uniform_sphere_rate=self.opt.uniform_sphere_rate)
+
+            # random focal
+            fov = random.random() * (self.opt.fovy_range[1] - self.opt.fovy_range[0]) + self.opt.fovy_range[0]
+
+        elif self.type == 'six_views':
+            # six views
+            thetas_six = [90]*4 + [1e-6] + [180]
+            phis_six = [0, 90, 180, -90, 0, 0]
+            thetas = torch.FloatTensor([thetas_six[index[0]]]).to(self.device)
+            phis = torch.FloatTensor([phis_six[index[0]]]).to(self.device)
+            radius = torch.FloatTensor([self.opt.default_radius]).to(self.device)
+            poses, dirs = circle_poses(self.device, radius=radius, theta=thetas, phi=phis, return_dirs=True, angle_overhead=self.opt.angle_overhead, angle_front=self.opt.angle_front)
+
+            # fixed focal
+            fov = self.opt.default_fovy
+
+        else:
+            # circle pose
+            thetas = torch.FloatTensor([self.opt.default_polar]).to(self.device)
+            phis = torch.FloatTensor([(index[0] / self.size) * 360]).to(self.device)
+            radius = torch.FloatTensor([self.opt.default_radius]).to(self.device)
+            poses, dirs = circle_poses(self.device, radius=radius, theta=thetas, phi=phis, return_dirs=True, angle_overhead=self.opt.angle_overhead, angle_front=self.opt.angle_front)
+
+            # fixed focal
+            fov = self.opt.default_fovy
+
+        focal = self.H / (2 * np.tan(np.deg2rad(fov) / 2))
+        intrinsics = np.array([focal, focal, self.cx, self.cy])
+
+        projection = torch.tensor([
+            [2*focal/self.W, 0, 0, 0],
+            [0, -2*focal/self.H, 0, 0],
+            [0, 0, -(self.far+self.near)/(self.far-self.near), -(2*self.far*self.near)/(self.far-self.near)],
+            [0, 0, -1, 0]
+        ], dtype=torch.float32, device=self.device).unsqueeze(0)
+
+        mvp = projection @ torch.inverse(poses) # [1, 4, 4]
+
+        # sample a low-resolution but full image
+        rays = get_rays(poses, intrinsics, self.H, self.W, -1)
+
+        # delta polar/azimuth/radius to default view
+        delta_polar = thetas - self.opt.default_polar
+        delta_azimuth = phis - self.opt.default_azimuth
+        delta_azimuth[delta_azimuth > 180] -= 360 # range in [-180, 180]
+        delta_radius = radius - self.opt.default_radius
+
+        data = {
+            'H': self.H,
+            'W': self.W,
+            'rays_o': rays['rays_o'],
+            'rays_d': rays['rays_d'],
+            'dir': dirs,
+            'mvp': mvp,
+            'polar': delta_polar,
+            'azimuth': delta_azimuth,
+            'radius': delta_radius,
+        }
+
+        return data
+
+    def dataloader(self, batch_size=None):
+        batch_size = batch_size or self.opt.batch_size
+        loader = DataLoader(list(range(self.size)), batch_size=batch_size, collate_fn=self.collate, shuffle=self.training, num_workers=0)
+        loader._data = self
+        return loader
+
+
+def generate_grid_points(resolution=128, device='cuda'):
+    # resolution: number of points along each dimension
+    # Generate the grid points
+    x = torch.linspace(0, 1, resolution)
+    y = torch.linspace(0, 1, resolution)
+    z = torch.linspace(0, 1, resolution)
+    # Create the meshgrid
+    grid_x, grid_y, grid_z = torch.meshgrid(x, y, z)
+
+    # Flatten the grid points if needed
+    grid_points = torch.stack((grid_x.flatten(), grid_y.flatten(), grid_z.flatten()), dim=1).to(device)
+    return grid_points
+