first commit

This commit is contained in:
Guocheng Qian
2023-08-02 19:51:43 -07:00
parent c2891c38cc
commit 13e18567fa
202 changed files with 43362 additions and 17 deletions

158
render/light.py Normal file
View File

@@ -0,0 +1,158 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import os
import numpy as np
import torch
import nvdiffrast.torch as dr
from . import util
from . import renderutils as ru
######################################################################################
# Utility functions
######################################################################################
class cubemap_mip(torch.autograd.Function):
@staticmethod
def forward(ctx, cubemap):
return util.avg_pool_nhwc(cubemap, (2,2))
@staticmethod
def backward(ctx, dout):
res = dout.shape[1] * 2
out = torch.zeros(6, res, res, dout.shape[-1], dtype=torch.float32, device="cuda")
for s in range(6):
gy, gx = torch.meshgrid(torch.linspace(-1.0 + 1.0 / res, 1.0 - 1.0 / res, res, device="cuda"),
torch.linspace(-1.0 + 1.0 / res, 1.0 - 1.0 / res, res, device="cuda"),
) # indexing='ij')
v = util.safe_normalize(util.cube_to_dir(s, gx, gy))
out[s, ...] = dr.texture(dout[None, ...] * 0.25, v[None, ...].contiguous(), filter_mode='linear', boundary_mode='cube')
return out
######################################################################################
# Split-sum environment map light source with automatic mipmap generation
######################################################################################
class EnvironmentLight(torch.nn.Module):
LIGHT_MIN_RES = 16
MIN_ROUGHNESS = 0.08
MAX_ROUGHNESS = 0.5
def __init__(self, base):
super(EnvironmentLight, self).__init__()
self.mtx = None
self.base = torch.nn.Parameter(base.clone().detach(), requires_grad=True)
self.register_parameter('env_base', self.base)
def xfm(self, mtx):
self.mtx = mtx
def clone(self):
return EnvironmentLight(self.base.clone().detach())
def clamp_(self, min=None, max=None):
self.base.clamp_(min, max)
def get_mip(self, roughness):
return torch.where(roughness < self.MAX_ROUGHNESS
, (torch.clamp(roughness, self.MIN_ROUGHNESS, self.MAX_ROUGHNESS) - self.MIN_ROUGHNESS) / (self.MAX_ROUGHNESS - self.MIN_ROUGHNESS) * (len(self.specular) - 2)
, (torch.clamp(roughness, self.MAX_ROUGHNESS, 1.0) - self.MAX_ROUGHNESS) / (1.0 - self.MAX_ROUGHNESS) + len(self.specular) - 2)
def build_mips(self, cutoff=0.99):
self.specular = [self.base]
while self.specular[-1].shape[1] > self.LIGHT_MIN_RES:
self.specular += [cubemap_mip.apply(self.specular[-1])]
self.diffuse = ru.diffuse_cubemap(self.specular[-1])
for idx in range(len(self.specular) - 1):
roughness = (idx / (len(self.specular) - 2)) * (self.MAX_ROUGHNESS - self.MIN_ROUGHNESS) + self.MIN_ROUGHNESS
self.specular[idx] = ru.specular_cubemap(self.specular[idx], roughness, cutoff)
self.specular[-1] = ru.specular_cubemap(self.specular[-1], 1.0, cutoff)
def regularizer(self):
white = (self.base[..., 0:1] + self.base[..., 1:2] + self.base[..., 2:3]) / 3.0
return torch.mean(torch.abs(self.base - white))
def shade(self, gb_pos, gb_normal, kd, ks, view_pos, specular=True):
wo = util.safe_normalize(view_pos - gb_pos)
if specular:
roughness = ks[..., 1:2] # y component
metallic = ks[..., 2:3] # z component
spec_col = (1.0 - metallic)*0.04 + kd * metallic
diff_col = kd * (1.0 - metallic)
else:
diff_col = kd
reflvec = util.safe_normalize(util.reflect(wo, gb_normal))
nrmvec = gb_normal
if self.mtx is not None: # Rotate lookup
mtx = torch.as_tensor(self.mtx, dtype=torch.float32, device='cuda')
reflvec = ru.xfm_vectors(reflvec.view(reflvec.shape[0], reflvec.shape[1] * reflvec.shape[2], reflvec.shape[3]), mtx).view(*reflvec.shape)
nrmvec = ru.xfm_vectors(nrmvec.view(nrmvec.shape[0], nrmvec.shape[1] * nrmvec.shape[2], nrmvec.shape[3]), mtx).view(*nrmvec.shape)
# Diffuse lookup
diffuse = dr.texture(self.diffuse[None, ...], nrmvec.contiguous(), filter_mode='linear', boundary_mode='cube')
shaded_col = diffuse * diff_col
if specular:
# Lookup FG term from lookup texture
NdotV = torch.clamp(util.dot(wo, gb_normal), min=1e-4)
fg_uv = torch.cat((NdotV, roughness), dim=-1)
if not hasattr(self, '_FG_LUT'):
self._FG_LUT = torch.as_tensor(np.fromfile('data/irrmaps/bsdf_256_256.bin', dtype=np.float32).reshape(1, 256, 256, 2), dtype=torch.float32, device='cuda')
fg_lookup = dr.texture(self._FG_LUT, fg_uv, filter_mode='linear', boundary_mode='clamp')
# Roughness adjusted specular env lookup
miplevel = self.get_mip(roughness)
spec = dr.texture(self.specular[0][None, ...], reflvec.contiguous(), mip=list(m[None, ...] for m in self.specular[1:]), mip_level_bias=miplevel[..., 0], filter_mode='linear-mipmap-linear', boundary_mode='cube')
# Compute aggregate lighting
reflectance = spec_col * fg_lookup[...,0:1] + fg_lookup[...,1:2]
shaded_col += spec * reflectance
return shaded_col * (1.0 - ks[..., 0:1]) # Modulate by hemisphere visibility
######################################################################################
# Load and store
######################################################################################
# Load from latlong .HDR file
def _load_env_hdr(fn, scale=1.0):
latlong_img = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda')*scale
cubemap = util.latlong_to_cubemap(latlong_img, [512, 512])
l = EnvironmentLight(cubemap)
l.build_mips()
return l
def load_env(fn, scale=1.0):
if os.path.splitext(fn)[1].lower() == ".hdr":
return _load_env_hdr(fn, scale)
else:
assert False, "Unknown envlight extension %s" % os.path.splitext(fn)[1]
def save_env_map(fn, light):
assert isinstance(light, EnvironmentLight), "Can only save EnvironmentLight currently"
if isinstance(light, EnvironmentLight):
color = util.cubemap_to_latlong(light.base, [512, 1024])
util.save_image_raw(fn, color.detach().cpu().numpy())
######################################################################################
# Create trainable env map with random initialization
######################################################################################
def create_trainable_env_rnd(base_res, scale=0.5, bias=0.25):
base = torch.rand(6, base_res, base_res, 3, dtype=torch.float32, device='cuda') * scale + bias
return EnvironmentLight(base)

182
render/material.py Normal file
View File

@@ -0,0 +1,182 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import os
import numpy as np
import torch
from . import util
from . import texture
######################################################################################
# Wrapper to make materials behave like a python dict, but register textures as
# torch.nn.Module parameters.
######################################################################################
class Material(torch.nn.Module):
def __init__(self, mat_dict):
super(Material, self).__init__()
self.mat_keys = set()
for key in mat_dict.keys():
self.mat_keys.add(key)
self[key] = mat_dict[key]
def __contains__(self, key):
return hasattr(self, key)
def __getitem__(self, key):
return getattr(self, key)
def __setitem__(self, key, val):
self.mat_keys.add(key)
setattr(self, key, val)
def __delitem__(self, key):
self.mat_keys.remove(key)
delattr(self, key)
def keys(self):
return self.mat_keys
######################################################################################
# .mtl material format loading / storing
######################################################################################
@torch.no_grad()
def load_mtl(fn, clear_ks=True):
import re
mtl_path = os.path.dirname(fn)
# Read file
with open(fn, 'r') as f:
lines = f.readlines()
# Parse materials
materials = []
for line in lines:
split_line = re.split(' +|\t+|\n+', line.strip())
prefix = split_line[0].lower()
data = split_line[1:]
if 'newmtl' in prefix:
material = Material({'name' : data[0]})
materials += [material]
elif materials:
if 'bsdf' in prefix or 'map_kd' in prefix or 'map_ks' in prefix or 'bump' in prefix:
material[prefix] = data[0]
else:
material[prefix] = torch.tensor(tuple(float(d) for d in data), dtype=torch.float32, device='cuda')
# Convert everything to textures. Our code expects 'kd' and 'ks' to be texture maps. So replace constants with 1x1 maps
for mat in materials:
if not 'bsdf' in mat:
mat['bsdf'] = 'pbr'
if 'map_kd' in mat:
mat['kd'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_kd']))
else:
mat['kd'] = texture.Texture2D(mat['kd'])
if 'map_ks' in mat:
mat['ks'] = texture.load_texture2D(os.path.join(mtl_path, mat['map_ks']), channels=3)
else:
mat['ks'] = texture.Texture2D(mat['ks'])
if 'bump' in mat:
mat['normal'] = texture.load_texture2D(os.path.join(mtl_path, mat['bump']), lambda_fn=lambda x: x * 2 - 1, channels=3)
# Convert Kd from sRGB to linear RGB
mat['kd'] = texture.srgb_to_rgb(mat['kd'])
if clear_ks:
# Override ORM occlusion (red) channel by zeros. We hijack this channel
for mip in mat['ks'].getMips():
mip[..., 0] = 0.0
return materials
@torch.no_grad()
def save_mtl(fn, material):
folder = os.path.dirname(fn)
with open(fn, "w") as f:
f.write('newmtl defaultMat\n')
if material is not None:
f.write('bsdf %s\n' % material['bsdf'])
if 'kd' in material.keys():
f.write('map_kd texture_kd.png\n')
texture.save_texture2D(os.path.join(folder, 'texture_kd.png'), texture.rgb_to_srgb(material['kd']))
if 'ks' in material.keys():
f.write('map_ks texture_ks.png\n')
texture.save_texture2D(os.path.join(folder, 'texture_ks.png'), material['ks'])
if 'normal' in material.keys():
f.write('bump texture_n.png\n')
texture.save_texture2D(os.path.join(folder, 'texture_n.png'), material['normal'], lambda_fn=lambda x:(util.safe_normalize(x)+1)*0.5)
else:
f.write('Kd 1 1 1\n')
f.write('Ks 0 0 0\n')
f.write('Ka 0 0 0\n')
f.write('Tf 1 1 1\n')
f.write('Ni 1\n')
f.write('Ns 0\n')
######################################################################################
# Merge multiple materials into a single uber-material
######################################################################################
def _upscale_replicate(x, full_res):
x = x.permute(0, 3, 1, 2)
x = torch.nn.functional.pad(x, (0, full_res[1] - x.shape[3], 0, full_res[0] - x.shape[2]), 'replicate')
return x.permute(0, 2, 3, 1).contiguous()
def merge_materials(materials, texcoords, tfaces, mfaces):
assert len(materials) > 0
for mat in materials:
assert mat['bsdf'] == materials[0]['bsdf'], "All materials must have the same BSDF (uber shader)"
assert ('normal' in mat) is ('normal' in materials[0]), "All materials must have either normal map enabled or disabled"
uber_material = Material({
'name' : 'uber_material',
'bsdf' : materials[0]['bsdf'],
})
textures = ['kd', 'ks', 'normal']
# Find maximum texture resolution across all materials and textures
max_res = None
for mat in materials:
for tex in textures:
tex_res = np.array(mat[tex].getRes()) if tex in mat else np.array([1, 1])
max_res = np.maximum(max_res, tex_res) if max_res is not None else tex_res
# Compute size of compund texture and round up to nearest PoT
full_res = 2**np.ceil(np.log2(max_res * np.array([1, len(materials)]))).astype(np.int)
# Normalize texture resolution across all materials & combine into a single large texture
for tex in textures:
if tex in materials[0]:
tex_data = torch.cat(tuple(util.scale_img_nhwc(mat[tex].data, tuple(max_res)) for mat in materials), dim=2) # Lay out all textures horizontally, NHWC so dim2 is x
tex_data = _upscale_replicate(tex_data, full_res)
uber_material[tex] = texture.Texture2D(tex_data)
# Compute scaling values for used / unused texture area
s_coeff = [full_res[0] / max_res[0], full_res[1] / max_res[1]]
# Recompute texture coordinates to cooincide with new composite texture
new_tverts = {}
new_tverts_data = []
for fi in range(len(tfaces)):
matIdx = mfaces[fi]
for vi in range(3):
ti = tfaces[fi][vi]
if not (ti in new_tverts):
new_tverts[ti] = {}
if not (matIdx in new_tverts[ti]): # create new vertex
new_tverts_data.append([(matIdx + texcoords[ti][0]) / s_coeff[1], texcoords[ti][1] / s_coeff[0]]) # Offset texture coodrinate (x direction) by material id & scale to local space. Note, texcoords are (u,v) but texture is stored (w,h) so the indexes swap here
new_tverts[ti][matIdx] = len(new_tverts_data) - 1
tfaces[fi][vi] = new_tverts[ti][matIdx] # reindex vertex
return uber_material, new_tverts_data, tfaces

241
render/mesh.py Normal file
View File

@@ -0,0 +1,241 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import os
import numpy as np
import torch
from . import obj
from . import util
######################################################################################
# Base mesh class
######################################################################################
class Mesh:
def __init__(self, v_pos=None, t_pos_idx=None, v_nrm=None, t_nrm_idx=None, v_tex=None, t_tex_idx=None, v_tng=None, t_tng_idx=None, material=None, base=None):
self.v_pos = v_pos
self.v_nrm = v_nrm
self.v_tex = v_tex
self.v_tng = v_tng
self.t_pos_idx = t_pos_idx
self.t_nrm_idx = t_nrm_idx
self.t_tex_idx = t_tex_idx
self.t_tng_idx = t_tng_idx
self.material = material
if base is not None:
self.copy_none(base)
def copy_none(self, other):
if self.v_pos is None:
self.v_pos = other.v_pos
if self.t_pos_idx is None:
self.t_pos_idx = other.t_pos_idx
if self.v_nrm is None:
self.v_nrm = other.v_nrm
if self.t_nrm_idx is None:
self.t_nrm_idx = other.t_nrm_idx
if self.v_tex is None:
self.v_tex = other.v_tex
if self.t_tex_idx is None:
self.t_tex_idx = other.t_tex_idx
if self.v_tng is None:
self.v_tng = other.v_tng
if self.t_tng_idx is None:
self.t_tng_idx = other.t_tng_idx
if self.material is None:
self.material = other.material
def clone(self):
out = Mesh(base=self)
if out.v_pos is not None:
out.v_pos = out.v_pos.clone().detach()
if out.t_pos_idx is not None:
out.t_pos_idx = out.t_pos_idx.clone().detach()
if out.v_nrm is not None:
out.v_nrm = out.v_nrm.clone().detach()
if out.t_nrm_idx is not None:
out.t_nrm_idx = out.t_nrm_idx.clone().detach()
if out.v_tex is not None:
out.v_tex = out.v_tex.clone().detach()
if out.t_tex_idx is not None:
out.t_tex_idx = out.t_tex_idx.clone().detach()
if out.v_tng is not None:
out.v_tng = out.v_tng.clone().detach()
if out.t_tng_idx is not None:
out.t_tng_idx = out.t_tng_idx.clone().detach()
return out
######################################################################################
# Mesh loeading helper
######################################################################################
def load_mesh(filename, mtl_override=None):
name, ext = os.path.splitext(filename)
if ext == ".obj":
return obj.load_obj(filename, clear_ks=True, mtl_override=mtl_override)
assert False, "Invalid mesh file extension"
######################################################################################
# Compute AABB
######################################################################################
def aabb(mesh):
return torch.min(mesh.v_pos, dim=0).values, torch.max(mesh.v_pos, dim=0).values
######################################################################################
# Compute unique edge list from attribute/vertex index list
######################################################################################
def compute_edges(attr_idx, return_inverse=False):
with torch.no_grad():
# Create all edges, packed by triangle
all_edges = torch.cat((
torch.stack((attr_idx[:, 0], attr_idx[:, 1]), dim=-1),
torch.stack((attr_idx[:, 1], attr_idx[:, 2]), dim=-1),
torch.stack((attr_idx[:, 2], attr_idx[:, 0]), dim=-1),
), dim=-1).view(-1, 2)
# Swap edge order so min index is always first
order = (all_edges[:, 0] > all_edges[:, 1]).long().unsqueeze(dim=1)
sorted_edges = torch.cat((
torch.gather(all_edges, 1, order),
torch.gather(all_edges, 1, 1 - order)
), dim=-1)
# Eliminate duplicates and return inverse mapping
return torch.unique(sorted_edges, dim=0, return_inverse=return_inverse)
######################################################################################
# Compute unique edge to face mapping from attribute/vertex index list
######################################################################################
def compute_edge_to_face_mapping(attr_idx, return_inverse=False):
with torch.no_grad():
# Get unique edges
# Create all edges, packed by triangle
all_edges = torch.cat((
torch.stack((attr_idx[:, 0], attr_idx[:, 1]), dim=-1),
torch.stack((attr_idx[:, 1], attr_idx[:, 2]), dim=-1),
torch.stack((attr_idx[:, 2], attr_idx[:, 0]), dim=-1),
), dim=-1).view(-1, 2)
# Swap edge order so min index is always first
order = (all_edges[:, 0] > all_edges[:, 1]).long().unsqueeze(dim=1)
sorted_edges = torch.cat((
torch.gather(all_edges, 1, order),
torch.gather(all_edges, 1, 1 - order)
), dim=-1)
# Elliminate duplicates and return inverse mapping
unique_edges, idx_map = torch.unique(sorted_edges, dim=0, return_inverse=True)
tris = torch.arange(attr_idx.shape[0]).repeat_interleave(3).cuda()
tris_per_edge = torch.zeros((unique_edges.shape[0], 2), dtype=torch.int64).cuda()
# Compute edge to face table
mask0 = order[:,0] == 0
mask1 = order[:,0] == 1
tris_per_edge[idx_map[mask0], 0] = tris[mask0]
tris_per_edge[idx_map[mask1], 1] = tris[mask1]
return tris_per_edge
######################################################################################
# Align base mesh to reference mesh:move & rescale to match bounding boxes.
######################################################################################
def unit_size(mesh):
with torch.no_grad():
vmin, vmax = aabb(mesh)
scale = 2 / torch.max(vmax - vmin).item()
v_pos = mesh.v_pos - (vmax + vmin) / 2 # Center mesh on origin
v_pos = v_pos * scale # Rescale to unit size
return Mesh(v_pos, base=mesh)
######################################################################################
# Center & scale mesh for rendering
######################################################################################
def center_by_reference(base_mesh, ref_aabb, scale):
center = (ref_aabb[0] + ref_aabb[1]) * 0.5
scale = scale / torch.max(ref_aabb[1] - ref_aabb[0]).item()
v_pos = (base_mesh.v_pos - center[None, ...]) * scale
return Mesh(v_pos, base=base_mesh)
######################################################################################
# Simple smooth vertex normal computation
######################################################################################
def auto_normals(imesh):
i0 = imesh.t_pos_idx[:, 0]
i1 = imesh.t_pos_idx[:, 1]
i2 = imesh.t_pos_idx[:, 2]
v0 = imesh.v_pos[i0, :]
v1 = imesh.v_pos[i1, :]
v2 = imesh.v_pos[i2, :]
face_normals = torch.cross(v1 - v0, v2 - v0)
# Splat face normals to vertices
v_nrm = torch.zeros_like(imesh.v_pos)
v_nrm.scatter_add_(0, i0[:, None].repeat(1,3), face_normals)
v_nrm.scatter_add_(0, i1[:, None].repeat(1,3), face_normals)
v_nrm.scatter_add_(0, i2[:, None].repeat(1,3), face_normals)
# Normalize, replace zero (degenerated) normals with some default value
v_nrm = torch.where(util.dot(v_nrm, v_nrm) > 1e-20, v_nrm, torch.tensor([0.0, 0.0, 1.0], dtype=torch.float32, device='cuda'))
v_nrm = util.safe_normalize(v_nrm)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(v_nrm))
return Mesh(v_nrm=v_nrm, t_nrm_idx=imesh.t_pos_idx, base=imesh)
######################################################################################
# Compute tangent space from texture map coordinates
# Follows http://www.mikktspace.com/ conventions
######################################################################################
def compute_tangents(imesh):
vn_idx = [None] * 3
pos = [None] * 3
tex = [None] * 3
for i in range(0,3):
pos[i] = imesh.v_pos[imesh.t_pos_idx[:, i]]
tex[i] = imesh.v_tex[imesh.t_tex_idx[:, i]]
vn_idx[i] = imesh.t_nrm_idx[:, i]
tangents = torch.zeros_like(imesh.v_nrm)
tansum = torch.zeros_like(imesh.v_nrm)
# Compute tangent space for each triangle
uve1 = tex[1] - tex[0]
uve2 = tex[2] - tex[0]
pe1 = pos[1] - pos[0]
pe2 = pos[2] - pos[0]
nom = (pe1 * uve2[..., 1:2] - pe2 * uve1[..., 1:2])
denom = (uve1[..., 0:1] * uve2[..., 1:2] - uve1[..., 1:2] * uve2[..., 0:1])
# Avoid division by zero for degenerated texture coordinates
tang = nom / torch.where(denom > 0.0, torch.clamp(denom, min=1e-6), torch.clamp(denom, max=-1e-6))
# Update all 3 vertices
for i in range(0,3):
idx = vn_idx[i][:, None].repeat(1,3)
tangents.scatter_add_(0, idx, tang) # tangents[n_i] = tangents[n_i] + tang
tansum.scatter_add_(0, idx, torch.ones_like(tang)) # tansum[n_i] = tansum[n_i] + 1
tangents = tangents / tansum
# Normalize and make sure tangent is perpendicular to normal
tangents = util.safe_normalize(tangents)
tangents = util.safe_normalize(tangents - util.dot(tangents, imesh.v_nrm) * imesh.v_nrm)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(tangents))
return Mesh(v_tng=tangents, t_tng_idx=imesh.t_nrm_idx, base=imesh)

111
render/mlptexture.py Normal file
View File

@@ -0,0 +1,111 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import torch
import numpy as np
# import tinycudann as tcnn
from gridencoder import GridEncoder
#######################################################################################################################################################
# Small MLP using PyTorch primitives, internal helper class
#######################################################################################################################################################
class _MLP(torch.nn.Module):
def __init__(self, cfg, loss_scale=1.0):
super(_MLP, self).__init__()
self.loss_scale = loss_scale
net = (torch.nn.Linear(cfg['n_input_dims'], cfg['n_neurons'], bias=False), torch.nn.ReLU())
for i in range(cfg['n_hidden_layers']-1):
net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_neurons'], bias=False), torch.nn.ReLU())
net = net + (torch.nn.Linear(cfg['n_neurons'], cfg['n_output_dims'], bias=False),)
self.net = torch.nn.Sequential(*net).cuda()
self.net.apply(self._init_weights)
# if self.loss_scale != 1.0:
# self.net.register_full_backward_hook(lambda module, grad_i, grad_o: (grad_i[0] * self.loss_scale, ))
def forward(self, x):
return self.net(x.to(torch.float32))
@staticmethod
def _init_weights(m):
if type(m) == torch.nn.Linear:
torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='relu')
if hasattr(m.bias, 'data'):
m.bias.data.fill_(0.0)
#######################################################################################################################################################
# Outward visible MLP class
#######################################################################################################################################################
class MLPTexture3D(torch.nn.Module):
def __init__(self, AABB, channels = 9, internal_dims = 32, hidden = 2, min_max = None):
super(MLPTexture3D, self).__init__()
self.channels = channels
self.internal_dims = internal_dims
self.AABB = AABB
self.min_max = min_max
# Setup positional encoding, see https://github.com/NVlabs/tiny-cuda-nn for details
desired_resolution = 4096
base_grid_resolution = 16
num_levels = 16
per_level_scale = np.exp(np.log(desired_resolution / base_grid_resolution) / (num_levels-1))
# enc_cfg = {
# "otype": "HashGrid",
# "n_levels": num_levels,
# "n_features_per_level": 2,
# "log2_hashmap_size": 19,
# "base_resolution": base_grid_resolution,
# "per_level_scale" : per_level_scale
# }
# gradient_scaling = 128.0
# self.encoder = tcnn.Encoding(3, enc_cfg)
# self.encoder.register_full_backward_hook(lambda module, grad_i, grad_o: (grad_i[0] / gradient_scaling, ))
self.encoder = GridEncoder(3, num_levels, base_resolution=base_grid_resolution, per_level_scale=per_level_scale).cuda()
# Setup MLP
mlp_cfg = {
"n_input_dims" : self.encoder.output_dim,
"n_output_dims" : self.channels,
"n_hidden_layers" : hidden,
"n_neurons" : self.internal_dims
}
self.net = _MLP(mlp_cfg)
print("Encoder output: %d dims" % (self.encoder.output_dim))
# Sample texture at a given location
def sample(self, texc):
# texc: [n, h, w, 3]
# normalize coords into [0, 1]
_texc = (texc.view(-1, 3) - self.AABB[0][None, ...]) / (self.AABB[1][None, ...] - self.AABB[0][None, ...])
_texc = torch.clamp(_texc, min=0, max=1)
p_enc = self.encoder(_texc.contiguous())
out = self.net.forward(p_enc)
# Sigmoid limit and scale to the allowed range
out = torch.sigmoid(out) * (self.min_max[1][None, :] - self.min_max[0][None, :]) + self.min_max[0][None, :]
return out.view(*texc.shape[:-1], self.channels) # Remap to [n, h, w, 9]
# In-place clamp with no derivative to make sure values are in valid range after training
def clamp_(self):
pass
def cleanup(self):
# tcnn.free_temporary_memory()
pass

179
render/obj.py Normal file
View File

@@ -0,0 +1,179 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import os
import torch
from . import texture
from . import mesh
from . import material
######################################################################################
# Utility functions
######################################################################################
def _find_mat(materials, name):
for mat in materials:
if mat['name'] == name:
return mat
return materials[0] # Materials 0 is the default
######################################################################################
# Create mesh object from objfile
######################################################################################
def load_obj(filename, clear_ks=True, mtl_override=None):
obj_path = os.path.dirname(filename)
# Read entire file
with open(filename, 'r') as f:
lines = f.readlines()
# Load materials
all_materials = [
{
'name' : '_default_mat',
'bsdf' : 'pbr',
'kd' : texture.Texture2D(torch.tensor([0.5, 0.5, 0.5], dtype=torch.float32, device='cuda')),
'ks' : texture.Texture2D(torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32, device='cuda'))
}
]
if mtl_override is None:
for line in lines:
if len(line.split()) == 0:
continue
if line.split()[0] == 'mtllib':
all_materials += material.load_mtl(os.path.join(obj_path, line.split()[1]), clear_ks) # Read in entire material library
else:
all_materials += material.load_mtl(mtl_override)
# load vertices
vertices, texcoords, normals = [], [], []
for line in lines:
if len(line.split()) == 0:
continue
prefix = line.split()[0].lower()
if prefix == 'v':
vertices.append([float(v) for v in line.split()[1:]])
elif prefix == 'vt':
val = [float(v) for v in line.split()[1:]]
texcoords.append([val[0], 1.0 - val[1]])
elif prefix == 'vn':
normals.append([float(v) for v in line.split()[1:]])
# load faces
activeMatIdx = None
used_materials = []
faces, tfaces, nfaces, mfaces = [], [], [], []
for line in lines:
if len(line.split()) == 0:
continue
prefix = line.split()[0].lower()
if prefix == 'usemtl': # Track used materials
mat = _find_mat(all_materials, line.split()[1])
if not mat in used_materials:
used_materials.append(mat)
activeMatIdx = used_materials.index(mat)
elif prefix == 'f': # Parse face
vs = line.split()[1:]
nv = len(vs)
vv = vs[0].split('/')
v0 = int(vv[0]) - 1
t0 = int(vv[1]) - 1 if vv[1] != "" else -1
n0 = int(vv[2]) - 1 if vv[2] != "" else -1
for i in range(nv - 2): # Triangulate polygons
vv = vs[i + 1].split('/')
v1 = int(vv[0]) - 1
t1 = int(vv[1]) - 1 if vv[1] != "" else -1
n1 = int(vv[2]) - 1 if vv[2] != "" else -1
vv = vs[i + 2].split('/')
v2 = int(vv[0]) - 1
t2 = int(vv[1]) - 1 if vv[1] != "" else -1
n2 = int(vv[2]) - 1 if vv[2] != "" else -1
mfaces.append(activeMatIdx)
faces.append([v0, v1, v2])
tfaces.append([t0, t1, t2])
nfaces.append([n0, n1, n2])
assert len(tfaces) == len(faces) and len(nfaces) == len (faces)
# Create an "uber" material by combining all textures into a larger texture
if len(used_materials) > 1:
uber_material, texcoords, tfaces = material.merge_materials(used_materials, texcoords, tfaces, mfaces)
else:
uber_material = used_materials[0]
vertices = torch.tensor(vertices, dtype=torch.float32, device='cuda')
texcoords = torch.tensor(texcoords, dtype=torch.float32, device='cuda') if len(texcoords) > 0 else None
normals = torch.tensor(normals, dtype=torch.float32, device='cuda') if len(normals) > 0 else None
faces = torch.tensor(faces, dtype=torch.int64, device='cuda')
tfaces = torch.tensor(tfaces, dtype=torch.int64, device='cuda') if texcoords is not None else None
nfaces = torch.tensor(nfaces, dtype=torch.int64, device='cuda') if normals is not None else None
print(f'[load_obj] vertices: {vertices.shape}, faces: {faces.shape}')
print(f'[load_obj] texcoords: {texcoords.shape if texcoords is not None else "None"}')
return mesh.Mesh(vertices, faces, normals, nfaces, texcoords, tfaces, material=uber_material)
######################################################################################
# Save mesh object to objfile
######################################################################################
def write_obj(folder, mesh, save_material=True):
obj_file = os.path.join(folder, 'mesh.obj')
print("Writing mesh: ", obj_file)
with open(obj_file, "w") as f:
f.write("mtllib mesh.mtl\n")
f.write("g default\n")
v_pos = mesh.v_pos.detach().cpu().numpy() if mesh.v_pos is not None else None
v_nrm = mesh.v_nrm.detach().cpu().numpy() if mesh.v_nrm is not None else None
v_tex = mesh.v_tex.detach().cpu().numpy() if mesh.v_tex is not None else None
t_pos_idx = mesh.t_pos_idx.detach().cpu().numpy() if mesh.t_pos_idx is not None else None
t_nrm_idx = mesh.t_nrm_idx.detach().cpu().numpy() if mesh.t_nrm_idx is not None else None
t_tex_idx = mesh.t_tex_idx.detach().cpu().numpy() if mesh.t_tex_idx is not None else None
print(" writing %d vertices" % len(v_pos))
for v in v_pos:
f.write('v {} {} {} \n'.format(v[0], v[1], v[2]))
if v_tex is not None:
print(" writing %d texcoords" % len(v_tex))
assert(len(t_pos_idx) == len(t_tex_idx))
for v in v_tex:
f.write('vt {} {} \n'.format(v[0], 1.0 - v[1]))
if v_nrm is not None:
print(" writing %d normals" % len(v_nrm))
assert(len(t_pos_idx) == len(t_nrm_idx))
for v in v_nrm:
f.write('vn {} {} {}\n'.format(v[0], v[1], v[2]))
# faces
f.write("s 1 \n")
f.write("g pMesh1\n")
f.write("usemtl defaultMat\n")
# Write faces
print(" writing %d faces" % len(t_pos_idx))
for i in range(len(t_pos_idx)):
f.write("f ")
for j in range(3):
f.write(' %s/%s/%s' % (str(t_pos_idx[i][j]+1), '' if v_tex is None else str(t_tex_idx[i][j]+1), '' if v_nrm is None else str(t_nrm_idx[i][j]+1)))
f.write("\n")
if save_material:
mtl_file = os.path.join(folder, 'mesh.mtl')
print("Writing material: ", mtl_file)
material.save_mtl(mtl_file, mesh.material)
print("Done exporting mesh")

82
render/regularizer.py Normal file
View File

@@ -0,0 +1,82 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import torch
import nvdiffrast.torch as dr
from . import util
from . import mesh
######################################################################################
# Computes the image gradient, useful for kd/ks smoothness losses
######################################################################################
def image_grad(buf, std=0.01):
t, s = torch.meshgrid(torch.linspace(-1.0 + 1.0 / buf.shape[1], 1.0 - 1.0 / buf.shape[1], buf.shape[1], device="cuda"),
torch.linspace(-1.0 + 1.0 / buf.shape[2], 1.0 - 1.0 / buf.shape[2], buf.shape[2], device="cuda"),
) # indexing='ij')
tc = torch.normal(mean=0, std=std, size=(buf.shape[0], buf.shape[1], buf.shape[2], 2), device="cuda") + torch.stack((s, t), dim=-1)[None, ...]
tap = dr.texture(buf, tc, filter_mode='linear', boundary_mode='clamp')
return torch.abs(tap[..., :-1] - buf[..., :-1]) * tap[..., -1:] * buf[..., -1:]
######################################################################################
# Computes the avergage edge length of a mesh.
# Rough estimate of the tessellation of a mesh. Can be used e.g. to clamp gradients
######################################################################################
def avg_edge_length(v_pos, t_pos_idx):
e_pos_idx = mesh.compute_edges(t_pos_idx)
edge_len = util.length(v_pos[e_pos_idx[:, 0]] - v_pos[e_pos_idx[:, 1]])
return torch.mean(edge_len)
######################################################################################
# Laplacian regularization using umbrella operator (Fujiwara / Desbrun).
# https://mgarland.org/class/geom04/material/smoothing.pdf
######################################################################################
def laplace_regularizer_const(v_pos, t_pos_idx):
term = torch.zeros_like(v_pos)
norm = torch.zeros_like(v_pos[..., 0:1])
v0 = v_pos[t_pos_idx[:, 0], :]
v1 = v_pos[t_pos_idx[:, 1], :]
v2 = v_pos[t_pos_idx[:, 2], :]
term.scatter_add_(0, t_pos_idx[:, 0:1].repeat(1,3), (v1 - v0) + (v2 - v0))
term.scatter_add_(0, t_pos_idx[:, 1:2].repeat(1,3), (v0 - v1) + (v2 - v1))
term.scatter_add_(0, t_pos_idx[:, 2:3].repeat(1,3), (v0 - v2) + (v1 - v2))
two = torch.ones_like(v0) * 2.0
norm.scatter_add_(0, t_pos_idx[:, 0:1], two)
norm.scatter_add_(0, t_pos_idx[:, 1:2], two)
norm.scatter_add_(0, t_pos_idx[:, 2:3], two)
term = term / torch.clamp(norm, min=1.0)
return torch.mean(term**2)
######################################################################################
# Smooth vertex normals
######################################################################################
def normal_consistency(v_pos, t_pos_idx):
# Compute face normals
v0 = v_pos[t_pos_idx[:, 0], :]
v1 = v_pos[t_pos_idx[:, 1], :]
v2 = v_pos[t_pos_idx[:, 2], :]
face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0))
tris_per_edge = mesh.compute_edge_to_face_mapping(t_pos_idx)
# Fetch normals for both faces sharind an edge
n0 = face_normals[tris_per_edge[:, 0], :]
n1 = face_normals[tris_per_edge[:, 1], :]
# Compute error metric based on normal difference
term = torch.clamp(util.dot(n0, n1), min=-1.0, max=1.0)
term = (1.0 - term) * 0.5
return torch.mean(torch.abs(term))

311
render/render.py Normal file
View File

@@ -0,0 +1,311 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import torch
import numpy as np
import nvdiffrast.torch as dr
from . import util
from . import renderutils as ru
from . import light
# ==============================================================================================
# Helper functions
# ==============================================================================================
def interpolate(attr, rast, attr_idx, rast_db=None):
return dr.interpolate(attr.contiguous(), rast, attr_idx, rast_db=rast_db, diff_attrs=None if rast_db is None else 'all')
# ==============================================================================================
# pixel shader
# ==============================================================================================
def shade(
gb_pos, gb_mask,
gb_geometric_normal,
gb_normal,
gb_tangent,
gb_texc,
gb_texc_deriv,
view_pos,
lgt,
material,
bsdf
):
################################################################################
# Texture lookups
################################################################################
perturbed_nrm = None
if 'kd_ks_normal' in material:
# Combined texture, used for MLPs because lookups are expensive
all_tex_jitter = material['kd_ks_normal'].sample(gb_pos + torch.normal(mean=0, std=0.01, size=gb_pos.shape, device="cuda"))
all_tex = material['kd_ks_normal'].sample(gb_pos)
assert all_tex.shape[-1] == 9 or all_tex.shape[-1] == 10, "Combined kd_ks_normal must be 9 or 10 channels"
kd, ks, perturbed_nrm = all_tex[..., :-6], all_tex[..., -6:-3], all_tex[..., -3:]
# Compute albedo (kd) gradient, used for material regularizer
kd_grad = torch.sum(torch.abs(all_tex_jitter[..., :-6] - all_tex[..., :-6]), dim=-1, keepdim=True) / 3
else:
kd_jitter = material['kd'].sample(gb_texc + torch.normal(mean=0, std=0.005, size=gb_texc.shape, device="cuda"), gb_texc_deriv)
kd = material['kd'].sample(gb_texc, gb_texc_deriv)
ks = material['ks'].sample(gb_texc, gb_texc_deriv)[..., 0:3] # skip alpha
if 'normal' in material:
perturbed_nrm = material['normal'].sample(gb_texc, gb_texc_deriv)
kd_grad = torch.sum(torch.abs(kd_jitter[..., 0:3] - kd[..., 0:3]), dim=-1, keepdim=True) / 3
# Separate kd into alpha and color, default alpha = 1
alpha = kd[..., 3:4] if kd.shape[-1] == 4 else torch.ones_like(kd[..., 0:1])
kd = kd[..., 0:3]
################################################################################
# Normal perturbation & normal bend
################################################################################
if 'no_perturbed_nrm' in material and material['no_perturbed_nrm']:
perturbed_nrm = None
gb_normal = ru.prepare_shading_normal(gb_pos, view_pos, perturbed_nrm, gb_normal, gb_tangent, gb_geometric_normal, two_sided_shading=True, opengl=True)
################################################################################
# Evaluate BSDF
################################################################################
assert 'bsdf' in material or bsdf is not None, "Material must specify a BSDF type"
bsdf = material['bsdf'] if bsdf is None else bsdf
if bsdf == 'pbr':
if isinstance(lgt, light.EnvironmentLight):
shaded_col = lgt.shade(gb_pos, gb_normal, kd, ks, view_pos, specular=True)
else:
assert False, "Invalid light type"
elif bsdf == 'diffuse':
if isinstance(lgt, light.EnvironmentLight):
shaded_col = lgt.shade(gb_pos, gb_normal, kd, ks, view_pos, specular=False)
else:
assert False, "Invalid light type"
elif bsdf == 'normal':
shaded_col = (gb_normal + 1.0)*0.5
elif bsdf == 'tangent':
shaded_col = (gb_tangent + 1.0)*0.5
elif bsdf == 'kd':
shaded_col = kd
elif bsdf == 'ks':
shaded_col = ks
else:
assert False, "Invalid BSDF '%s'" % bsdf
# Return multiple buffers
buffers = {
'shaded' : torch.cat((shaded_col, alpha), dim=-1),
'kd_grad' : torch.cat((kd_grad, alpha), dim=-1),
'occlusion' : torch.cat((ks[..., :1], alpha), dim=-1),
'normal' : torch.cat(((gb_normal + 1.0) * 0.5, gb_mask), dim=-1),
}
return buffers
# ==============================================================================================
# Render a depth slice of the mesh (scene), some limitations:
# - Single mesh
# - Single light
# - Single material
# ==============================================================================================
def render_layer(
rast,
rast_deriv,
mesh,
view_pos,
lgt,
resolution,
spp,
msaa,
bsdf
):
full_res = [resolution[0]*spp, resolution[1]*spp]
################################################################################
# Rasterize
################################################################################
# Scale down to shading resolution when MSAA is enabled, otherwise shade at full resolution
if spp > 1 and msaa:
rast_out_s = util.scale_img_nhwc(rast, resolution, mag='nearest', min='nearest')
rast_out_deriv_s = util.scale_img_nhwc(rast_deriv, resolution, mag='nearest', min='nearest') * spp
else:
rast_out_s = rast
rast_out_deriv_s = rast_deriv
################################################################################
# Interpolate attributes
################################################################################
# Interpolate world space position
gb_pos, _ = interpolate(mesh.v_pos[None, ...], rast_out_s, mesh.t_pos_idx.int())
gb_mask, _ = interpolate(torch.ones_like(mesh.v_pos[None, ..., :1]), rast_out_s, mesh.t_pos_idx.int())
# Compute geometric normals. We need those because of bent normals trick (for bump mapping)
v0 = mesh.v_pos[mesh.t_pos_idx[:, 0], :]
v1 = mesh.v_pos[mesh.t_pos_idx[:, 1], :]
v2 = mesh.v_pos[mesh.t_pos_idx[:, 2], :]
face_normals = util.safe_normalize(torch.cross(v1 - v0, v2 - v0))
face_normal_indices = (torch.arange(0, face_normals.shape[0], dtype=torch.int64, device='cuda')[:, None]).repeat(1, 3)
gb_geometric_normal, _ = interpolate(face_normals[None, ...], rast_out_s, face_normal_indices.int())
# Compute tangent space
assert mesh.v_nrm is not None and mesh.v_tng is not None
gb_normal, _ = interpolate(mesh.v_nrm[None, ...], rast_out_s, mesh.t_nrm_idx.int())
gb_tangent, _ = interpolate(mesh.v_tng[None, ...], rast_out_s, mesh.t_tng_idx.int()) # Interpolate tangents
# Texture coordinate
assert mesh.v_tex is not None
gb_texc, gb_texc_deriv = interpolate(mesh.v_tex[None, ...], rast_out_s, mesh.t_tex_idx.int(), rast_db=rast_out_deriv_s)
################################################################################
# Shade
################################################################################
buffers = shade(gb_pos, gb_mask, gb_geometric_normal, gb_normal, gb_tangent, gb_texc, gb_texc_deriv, view_pos, lgt, mesh.material, bsdf)
################################################################################
# Prepare output
################################################################################
# Scale back up to visibility resolution if using MSAA
if spp > 1 and msaa:
for key in buffers.keys():
buffers[key] = util.scale_img_nhwc(buffers[key], full_res, mag='nearest', min='nearest')
# Return buffers
return buffers
# ==============================================================================================
# Render a depth peeled mesh (scene), some limitations:
# - Single mesh
# - Single light
# - Single material
# ==============================================================================================
def render_mesh(
ctx,
mesh,
mtx_in, # mvp, [B, 4, 4]
view_pos, # cam pos, [B, 3]
lgt,
resolution, # [2] (check the custom collate in dataset/dataset.py)
spp = 1,
num_layers = 1, # always 1
msaa = False,
background = None,
bsdf = None
):
def prepare_input_vector(x):
x = torch.tensor(x, dtype=torch.float32, device='cuda') if not torch.is_tensor(x) else x
return x[:, None, None, :] if len(x.shape) == 2 else x
def composite_buffer(key, layers, background, antialias):
accum = background
for buffers, rast in reversed(layers):
alpha = (rast[..., -1:] > 0).float() * buffers[key][..., -1:]
accum = torch.lerp(accum, torch.cat((buffers[key][..., :-1], torch.ones_like(buffers[key][..., -1:])), dim=-1), alpha)
if antialias:
accum = dr.antialias(accum.contiguous(), rast, v_pos_clip, mesh.t_pos_idx.int())
return accum
assert mesh.t_pos_idx.shape[0] > 0, "Got empty training triangle mesh (unrecoverable discontinuity)"
assert background is None or (background.shape[1] == resolution[0] and background.shape[2] == resolution[1])
full_res = [resolution[0]*spp, resolution[1]*spp]
# Convert numpy arrays to torch tensors
mtx_in = torch.tensor(mtx_in, dtype=torch.float32, device='cuda') if not torch.is_tensor(mtx_in) else mtx_in
view_pos = prepare_input_vector(view_pos) # [B, 1, 1, 3]
# clip space transform
v_pos_clip = ru.xfm_points(mesh.v_pos[None, ...], mtx_in) # just the mvp transform, [1, N, 3]
# Render all layers front-to-back
layers = []
with dr.DepthPeeler(ctx, v_pos_clip, mesh.t_pos_idx.int(), full_res) as peeler:
for _ in range(num_layers):
rast, db = peeler.rasterize_next_layer()
layers += [(render_layer(rast, db, mesh, view_pos, lgt, resolution, spp, msaa, bsdf), rast)]
# Setup background
if background is not None:
if spp > 1:
background = util.scale_img_nhwc(background, full_res, mag='nearest', min='nearest')
background = torch.cat((background, torch.zeros_like(background[..., 0:1])), dim=-1)
else:
background = torch.zeros(1, full_res[0], full_res[1], 4, dtype=torch.float32, device='cuda')
# Composite layers front-to-back
out_buffers = {}
for key in layers[0][0].keys():
if key == 'shaded':
accum = composite_buffer(key, layers, background, True)
elif key == 'normal':
accum = composite_buffer(key, layers, torch.zeros_like(layers[0][0][key]), True)
else:
accum = composite_buffer(key, layers, torch.zeros_like(layers[0][0][key]), False)
# Downscale to framebuffer resolution. Use avg pooling
out_buffers[key] = util.avg_pool_nhwc(accum, spp) if spp > 1 else accum
return out_buffers
# ==============================================================================================
# Render UVs
# ==============================================================================================
def render_uv(ctx, mesh, resolution, mlp_texture):
# clip space transform
uv_clip = mesh.v_tex[None, ...]*2.0 - 1.0
# pad to four component coordinate
uv_clip4 = torch.cat((uv_clip, torch.zeros_like(uv_clip[...,0:1]), torch.ones_like(uv_clip[...,0:1])), dim = -1)
# rasterize
rast, _ = dr.rasterize(ctx, uv_clip4, mesh.t_tex_idx.int(), resolution)
# Interpolate world space position
gb_pos, _ = interpolate(mesh.v_pos[None, ...], rast, mesh.t_pos_idx.int())
# Sample out textures from MLP
all_tex = mlp_texture.sample(gb_pos)
assert all_tex.shape[-1] == 9 or all_tex.shape[-1] == 10, "Combined kd_ks_normal must be 9 or 10 channels"
mask = (rast[..., -1:] > 0).float()
kd = all_tex[..., :-6]
ks = all_tex[..., -6:-3]
perturbed_nrm = util.safe_normalize(all_tex[..., -3:])
# antialiasing
from sklearn.neighbors import NearestNeighbors
from scipy.ndimage import binary_dilation, binary_erosion
mask_np = mask.cpu().numpy() > 0
inpaint_region = binary_dilation(mask_np, iterations=3)
inpaint_region[mask_np] = 0
search_region = mask_np.copy()
not_search_region = binary_erosion(search_region, iterations=2)
search_region[not_search_region] = 0
search_coords = np.stack(np.nonzero(search_region), axis=-1)
inpaint_coords = np.stack(np.nonzero(inpaint_region), axis=-1)
knn = NearestNeighbors(n_neighbors=1, algorithm='kd_tree').fit(search_coords)
_, indices = knn.kneighbors(inpaint_coords)
inpaint_coords = torch.from_numpy(inpaint_coords).long().to(kd.device)
target_coords = torch.from_numpy(search_coords[indices[:, 0]]).long().to(kd.device)
kd[tuple(inpaint_coords.T)] = kd[tuple(target_coords.T)]
ks[tuple(inpaint_coords.T)] = ks[tuple(target_coords.T)]
perturbed_nrm[tuple(inpaint_coords.T)] = perturbed_nrm[tuple(target_coords.T)]
return mask, kd, ks, perturbed_nrm

View File

@@ -0,0 +1,11 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
from .ops import xfm_points, xfm_vectors, image_loss, diffuse_cubemap, specular_cubemap, prepare_shading_normal, lambert, frostbite_diffuse, pbr_specular, pbr_bsdf, _fresnel_shlick, _ndf_ggx, _lambda_ggx, _masking_smith
__all__ = ["xfm_vectors", "xfm_points", "image_loss", "diffuse_cubemap","specular_cubemap", "prepare_shading_normal", "lambert", "frostbite_diffuse", "pbr_specular", "pbr_bsdf", "_fresnel_shlick", "_ndf_ggx", "_lambda_ggx", "_masking_smith", ]

151
render/renderutils/bsdf.py Normal file
View File

@@ -0,0 +1,151 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import math
import torch
NORMAL_THRESHOLD = 0.1
################################################################################
# Vector utility functions
################################################################################
def _dot(x, y):
return torch.sum(x*y, -1, keepdim=True)
def _reflect(x, n):
return 2*_dot(x, n)*n - x
def _safe_normalize(x):
return torch.nn.functional.normalize(x, dim = -1)
def _bend_normal(view_vec, smooth_nrm, geom_nrm, two_sided_shading):
# Swap normal direction for backfacing surfaces
if two_sided_shading:
smooth_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, smooth_nrm, -smooth_nrm)
geom_nrm = torch.where(_dot(geom_nrm, view_vec) > 0, geom_nrm, -geom_nrm)
t = torch.clamp(_dot(view_vec, smooth_nrm) / NORMAL_THRESHOLD, min=0, max=1)
return torch.lerp(geom_nrm, smooth_nrm, t)
def _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl):
smooth_bitang = _safe_normalize(torch.cross(smooth_tng, smooth_nrm))
if opengl:
shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] - smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
else:
shading_nrm = smooth_tng * perturbed_nrm[..., 0:1] + smooth_bitang * perturbed_nrm[..., 1:2] + smooth_nrm * torch.clamp(perturbed_nrm[..., 2:3], min=0.0)
return _safe_normalize(shading_nrm)
def bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl):
smooth_nrm = _safe_normalize(smooth_nrm)
smooth_tng = _safe_normalize(smooth_tng)
view_vec = _safe_normalize(view_pos - pos)
shading_nrm = _perturb_normal(perturbed_nrm, smooth_nrm, smooth_tng, opengl)
return _bend_normal(view_vec, shading_nrm, geom_nrm, two_sided_shading)
################################################################################
# Simple lambertian diffuse BSDF
################################################################################
def bsdf_lambert(nrm, wi):
return torch.clamp(_dot(nrm, wi), min=0.0) / math.pi
################################################################################
# Frostbite diffuse
################################################################################
def bsdf_frostbite(nrm, wi, wo, linearRoughness):
wiDotN = _dot(wi, nrm)
woDotN = _dot(wo, nrm)
h = _safe_normalize(wo + wi)
wiDotH = _dot(wi, h)
energyBias = 0.5 * linearRoughness
energyFactor = 1.0 - (0.51 / 1.51) * linearRoughness
f90 = energyBias + 2.0 * wiDotH * wiDotH * linearRoughness
f0 = 1.0
wiScatter = bsdf_fresnel_shlick(f0, f90, wiDotN)
woScatter = bsdf_fresnel_shlick(f0, f90, woDotN)
res = wiScatter * woScatter * energyFactor
return torch.where((wiDotN > 0.0) & (woDotN > 0.0), res, torch.zeros_like(res))
################################################################################
# Phong specular, loosely based on mitsuba implementation
################################################################################
def bsdf_phong(nrm, wo, wi, N):
dp_r = torch.clamp(_dot(_reflect(wo, nrm), wi), min=0.0, max=1.0)
dp_l = torch.clamp(_dot(nrm, wi), min=0.0, max=1.0)
return (dp_r ** N) * dp_l * (N + 2) / (2 * math.pi)
################################################################################
# PBR's implementation of GGX specular
################################################################################
specular_epsilon = 1e-4
def bsdf_fresnel_shlick(f0, f90, cosTheta):
_cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
return f0 + (f90 - f0) * (1.0 - _cosTheta) ** 5.0
def bsdf_ndf_ggx(alphaSqr, cosTheta):
_cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1
return alphaSqr / (d * d * math.pi)
def bsdf_lambda_ggx(alphaSqr, cosTheta):
_cosTheta = torch.clamp(cosTheta, min=specular_epsilon, max=1.0 - specular_epsilon)
cosThetaSqr = _cosTheta * _cosTheta
tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr
res = 0.5 * (torch.sqrt(1 + alphaSqr * tanThetaSqr) - 1.0)
return res
def bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO):
lambdaI = bsdf_lambda_ggx(alphaSqr, cosThetaI)
lambdaO = bsdf_lambda_ggx(alphaSqr, cosThetaO)
return 1 / (1 + lambdaI + lambdaO)
def bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08):
_alpha = torch.clamp(alpha, min=min_roughness*min_roughness, max=1.0)
alphaSqr = _alpha * _alpha
h = _safe_normalize(wo + wi)
woDotN = _dot(wo, nrm)
wiDotN = _dot(wi, nrm)
woDotH = _dot(wo, h)
nDotH = _dot(nrm, h)
D = bsdf_ndf_ggx(alphaSqr, nDotH)
G = bsdf_masking_smith_ggx_correlated(alphaSqr, woDotN, wiDotN)
F = bsdf_fresnel_shlick(col, 1, woDotH)
w = F * D * G * 0.25 / torch.clamp(woDotN, min=specular_epsilon)
frontfacing = (woDotN > specular_epsilon) & (wiDotN > specular_epsilon)
return torch.where(frontfacing, w, torch.zeros_like(w))
def bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF):
wo = _safe_normalize(view_pos - pos)
wi = _safe_normalize(light_pos - pos)
spec_str = arm[..., 0:1] # x component
roughness = arm[..., 1:2] # y component
metallic = arm[..., 2:3] # z component
ks = (0.04 * (1.0 - metallic) + kd * metallic) * (1 - spec_str)
kd = kd * (1.0 - metallic)
if BSDF == 0:
diffuse = kd * bsdf_lambert(nrm, wi)
else:
diffuse = kd * bsdf_frostbite(nrm, wi, wo, roughness)
specular = bsdf_pbr_specular(ks, nrm, wo, wi, roughness*roughness, min_roughness=min_roughness)
return diffuse + specular

View File

@@ -0,0 +1,710 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "common.h"
#include "bsdf.h"
#define SPECULAR_EPSILON 1e-4f
//------------------------------------------------------------------------
// Lambert functions
__device__ inline float fwdLambert(const vec3f nrm, const vec3f wi)
{
return max(dot(nrm, wi) / M_PI, 0.0f);
}
__device__ inline void bwdLambert(const vec3f nrm, const vec3f wi, vec3f& d_nrm, vec3f& d_wi, const float d_out)
{
if (dot(nrm, wi) > 0.0f)
bwdDot(nrm, wi, d_nrm, d_wi, d_out / M_PI);
}
//------------------------------------------------------------------------
// Fresnel Schlick
__device__ inline float fwdFresnelSchlick(const float f0, const float f90, const float cosTheta)
{
float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
float scale = powf(1.0f - _cosTheta, 5.0f);
return f0 * (1.0f - scale) + f90 * scale;
}
__device__ inline void bwdFresnelSchlick(const float f0, const float f90, const float cosTheta, float& d_f0, float& d_f90, float& d_cosTheta, const float d_out)
{
float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
float scale = pow(max(1.0f - _cosTheta, 0.0f), 5.0f);
d_f0 += d_out * (1.0 - scale);
d_f90 += d_out * scale;
if (cosTheta >= SPECULAR_EPSILON && cosTheta < 1.0f - SPECULAR_EPSILON)
{
d_cosTheta += d_out * (f90 - f0) * -5.0f * powf(1.0f - cosTheta, 4.0f);
}
}
__device__ inline vec3f fwdFresnelSchlick(const vec3f f0, const vec3f f90, const float cosTheta)
{
float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
float scale = powf(1.0f - _cosTheta, 5.0f);
return f0 * (1.0f - scale) + f90 * scale;
}
__device__ inline void bwdFresnelSchlick(const vec3f f0, const vec3f f90, const float cosTheta, vec3f& d_f0, vec3f& d_f90, float& d_cosTheta, const vec3f d_out)
{
float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
float scale = pow(max(1.0f - _cosTheta, 0.0f), 5.0f);
d_f0 += d_out * (1.0 - scale);
d_f90 += d_out * scale;
if (cosTheta >= SPECULAR_EPSILON && cosTheta < 1.0f - SPECULAR_EPSILON)
{
d_cosTheta += sum(d_out * (f90 - f0) * -5.0f * powf(1.0f - cosTheta, 4.0f));
}
}
//------------------------------------------------------------------------
// Frostbite diffuse
__device__ inline float fwdFrostbiteDiffuse(const vec3f nrm, const vec3f wi, const vec3f wo, float linearRoughness)
{
float wiDotN = dot(wi, nrm);
float woDotN = dot(wo, nrm);
if (wiDotN > 0.0f && woDotN > 0.0f)
{
vec3f h = safeNormalize(wo + wi);
float wiDotH = dot(wi, h);
float energyBias = 0.5f * linearRoughness;
float energyFactor = 1.0f - (0.51f / 1.51f) * linearRoughness;
float f90 = energyBias + 2.f * wiDotH * wiDotH * linearRoughness;
float f0 = 1.f;
float wiScatter = fwdFresnelSchlick(f0, f90, wiDotN);
float woScatter = fwdFresnelSchlick(f0, f90, woDotN);
return wiScatter * woScatter * energyFactor;
}
else return 0.0f;
}
__device__ inline void bwdFrostbiteDiffuse(const vec3f nrm, const vec3f wi, const vec3f wo, float linearRoughness, vec3f& d_nrm, vec3f& d_wi, vec3f& d_wo, float &d_linearRoughness, const float d_out)
{
float wiDotN = dot(wi, nrm);
float woDotN = dot(wo, nrm);
if (wiDotN > 0.0f && woDotN > 0.0f)
{
vec3f h = safeNormalize(wo + wi);
float wiDotH = dot(wi, h);
float energyBias = 0.5f * linearRoughness;
float energyFactor = 1.0f - (0.51f / 1.51f) * linearRoughness;
float f90 = energyBias + 2.f * wiDotH * wiDotH * linearRoughness;
float f0 = 1.f;
float wiScatter = fwdFresnelSchlick(f0, f90, wiDotN);
float woScatter = fwdFresnelSchlick(f0, f90, woDotN);
// -------------- BWD --------------
// Backprop: return wiScatter * woScatter * energyFactor;
float d_wiScatter = d_out * woScatter * energyFactor;
float d_woScatter = d_out * wiScatter * energyFactor;
float d_energyFactor = d_out * wiScatter * woScatter;
// Backprop: float woScatter = fwdFresnelSchlick(f0, f90, woDotN);
float d_woDotN = 0.0f, d_f0 = 0.0, d_f90 = 0.0f;
bwdFresnelSchlick(f0, f90, woDotN, d_f0, d_f90, d_woDotN, d_woScatter);
// Backprop: float wiScatter = fwdFresnelSchlick(fd0, fd90, wiDotN);
float d_wiDotN = 0.0f;
bwdFresnelSchlick(f0, f90, wiDotN, d_f0, d_f90, d_wiDotN, d_wiScatter);
// Backprop: float f90 = energyBias + 2.f * wiDotH * wiDotH * linearRoughness;
float d_energyBias = d_f90;
float d_wiDotH = d_f90 * 4 * wiDotH * linearRoughness;
d_linearRoughness += d_f90 * 2 * wiDotH * wiDotH;
// Backprop: float energyFactor = 1.0f - (0.51f / 1.51f) * linearRoughness;
d_linearRoughness -= (0.51f / 1.51f) * d_energyFactor;
// Backprop: float energyBias = 0.5f * linearRoughness;
d_linearRoughness += 0.5 * d_energyBias;
// Backprop: float wiDotH = dot(wi, h);
vec3f d_h(0);
bwdDot(wi, h, d_wi, d_h, d_wiDotH);
// Backprop: vec3f h = safeNormalize(wo + wi);
vec3f d_wo_wi(0);
bwdSafeNormalize(wo + wi, d_wo_wi, d_h);
d_wi += d_wo_wi; d_wo += d_wo_wi;
bwdDot(wo, nrm, d_wo, d_nrm, d_woDotN);
bwdDot(wi, nrm, d_wi, d_nrm, d_wiDotN);
}
}
//------------------------------------------------------------------------
// Ndf GGX
__device__ inline float fwdNdfGGX(const float alphaSqr, const float cosTheta)
{
float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
float d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1.0f;
return alphaSqr / (d * d * M_PI);
}
__device__ inline void bwdNdfGGX(const float alphaSqr, const float cosTheta, float& d_alphaSqr, float& d_cosTheta, const float d_out)
{
// Torch only back propagates if clamp doesn't trigger
float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
float cosThetaSqr = _cosTheta * _cosTheta;
d_alphaSqr += d_out * (1.0f - (alphaSqr + 1.0f) * cosThetaSqr) / (M_PI * powf((alphaSqr - 1.0) * cosThetaSqr + 1.0f, 3.0f));
if (cosTheta > SPECULAR_EPSILON && cosTheta < 1.0f - SPECULAR_EPSILON)
{
d_cosTheta += d_out * -(4.0f * (alphaSqr - 1.0f) * alphaSqr * cosTheta) / (M_PI * powf((alphaSqr - 1.0) * cosThetaSqr + 1.0f, 3.0f));
}
}
//------------------------------------------------------------------------
// Lambda GGX
__device__ inline float fwdLambdaGGX(const float alphaSqr, const float cosTheta)
{
float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
float cosThetaSqr = _cosTheta * _cosTheta;
float tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr;
float res = 0.5f * (sqrtf(1.0f + alphaSqr * tanThetaSqr) - 1.0f);
return res;
}
__device__ inline void bwdLambdaGGX(const float alphaSqr, const float cosTheta, float& d_alphaSqr, float& d_cosTheta, const float d_out)
{
float _cosTheta = clamp(cosTheta, SPECULAR_EPSILON, 1.0f - SPECULAR_EPSILON);
float cosThetaSqr = _cosTheta * _cosTheta;
float tanThetaSqr = (1.0 - cosThetaSqr) / cosThetaSqr;
float res = 0.5f * (sqrtf(1.0f + alphaSqr * tanThetaSqr) - 1.0f);
d_alphaSqr += d_out * (0.25 * tanThetaSqr) / sqrtf(alphaSqr * tanThetaSqr + 1.0f);
if (cosTheta > SPECULAR_EPSILON && cosTheta < 1.0f - SPECULAR_EPSILON)
d_cosTheta += d_out * -(0.5 * alphaSqr) / (powf(_cosTheta, 3.0f) * sqrtf(alphaSqr / cosThetaSqr - alphaSqr + 1.0f));
}
//------------------------------------------------------------------------
// Masking GGX
__device__ inline float fwdMaskingSmithGGXCorrelated(const float alphaSqr, const float cosThetaI, const float cosThetaO)
{
float lambdaI = fwdLambdaGGX(alphaSqr, cosThetaI);
float lambdaO = fwdLambdaGGX(alphaSqr, cosThetaO);
return 1.0f / (1.0f + lambdaI + lambdaO);
}
__device__ inline void bwdMaskingSmithGGXCorrelated(const float alphaSqr, const float cosThetaI, const float cosThetaO, float& d_alphaSqr, float& d_cosThetaI, float& d_cosThetaO, const float d_out)
{
// FWD eval
float lambdaI = fwdLambdaGGX(alphaSqr, cosThetaI);
float lambdaO = fwdLambdaGGX(alphaSqr, cosThetaO);
// BWD eval
float d_lambdaIO = -d_out / powf(1.0f + lambdaI + lambdaO, 2.0f);
bwdLambdaGGX(alphaSqr, cosThetaI, d_alphaSqr, d_cosThetaI, d_lambdaIO);
bwdLambdaGGX(alphaSqr, cosThetaO, d_alphaSqr, d_cosThetaO, d_lambdaIO);
}
//------------------------------------------------------------------------
// GGX specular
__device__ vec3f fwdPbrSpecular(const vec3f col, const vec3f nrm, const vec3f wo, const vec3f wi, const float alpha, const float min_roughness)
{
float _alpha = clamp(alpha, min_roughness * min_roughness, 1.0f);
float alphaSqr = _alpha * _alpha;
vec3f h = safeNormalize(wo + wi);
float woDotN = dot(wo, nrm);
float wiDotN = dot(wi, nrm);
float woDotH = dot(wo, h);
float nDotH = dot(nrm, h);
float D = fwdNdfGGX(alphaSqr, nDotH);
float G = fwdMaskingSmithGGXCorrelated(alphaSqr, woDotN, wiDotN);
vec3f F = fwdFresnelSchlick(col, 1.0f, woDotH);
vec3f w = F * D * G * 0.25 / woDotN;
bool frontfacing = (woDotN > SPECULAR_EPSILON) & (wiDotN > SPECULAR_EPSILON);
return frontfacing ? w : 0.0f;
}
__device__ void bwdPbrSpecular(
const vec3f col, const vec3f nrm, const vec3f wo, const vec3f wi, const float alpha, const float min_roughness,
vec3f& d_col, vec3f& d_nrm, vec3f& d_wo, vec3f& d_wi, float& d_alpha, const vec3f d_out)
{
///////////////////////////////////////////////////////////////////////
// FWD eval
float _alpha = clamp(alpha, min_roughness * min_roughness, 1.0f);
float alphaSqr = _alpha * _alpha;
vec3f h = safeNormalize(wo + wi);
float woDotN = dot(wo, nrm);
float wiDotN = dot(wi, nrm);
float woDotH = dot(wo, h);
float nDotH = dot(nrm, h);
float D = fwdNdfGGX(alphaSqr, nDotH);
float G = fwdMaskingSmithGGXCorrelated(alphaSqr, woDotN, wiDotN);
vec3f F = fwdFresnelSchlick(col, 1.0f, woDotH);
vec3f w = F * D * G * 0.25 / woDotN;
bool frontfacing = (woDotN > SPECULAR_EPSILON) & (wiDotN > SPECULAR_EPSILON);
if (frontfacing)
{
///////////////////////////////////////////////////////////////////////
// BWD eval
vec3f d_F = d_out * D * G * 0.25f / woDotN;
float d_D = sum(d_out * F * G * 0.25f / woDotN);
float d_G = sum(d_out * F * D * 0.25f / woDotN);
float d_woDotN = -sum(d_out * F * D * G * 0.25f / (woDotN * woDotN));
vec3f d_f90(0);
float d_woDotH(0), d_wiDotN(0), d_nDotH(0), d_alphaSqr(0);
bwdFresnelSchlick(col, 1.0f, woDotH, d_col, d_f90, d_woDotH, d_F);
bwdMaskingSmithGGXCorrelated(alphaSqr, woDotN, wiDotN, d_alphaSqr, d_woDotN, d_wiDotN, d_G);
bwdNdfGGX(alphaSqr, nDotH, d_alphaSqr, d_nDotH, d_D);
vec3f d_h(0);
bwdDot(nrm, h, d_nrm, d_h, d_nDotH);
bwdDot(wo, h, d_wo, d_h, d_woDotH);
bwdDot(wi, nrm, d_wi, d_nrm, d_wiDotN);
bwdDot(wo, nrm, d_wo, d_nrm, d_woDotN);
vec3f d_h_unnorm(0);
bwdSafeNormalize(wo + wi, d_h_unnorm, d_h);
d_wo += d_h_unnorm;
d_wi += d_h_unnorm;
if (alpha > min_roughness * min_roughness)
d_alpha += d_alphaSqr * 2 * alpha;
}
}
//------------------------------------------------------------------------
// Full PBR BSDF
__device__ vec3f fwdPbrBSDF(const vec3f kd, const vec3f arm, const vec3f pos, const vec3f nrm, const vec3f view_pos, const vec3f light_pos, const float min_roughness, int BSDF)
{
vec3f wo = safeNormalize(view_pos - pos);
vec3f wi = safeNormalize(light_pos - pos);
float alpha = arm.y * arm.y;
vec3f spec_col = (0.04f * (1.0f - arm.z) + kd * arm.z) * (1.0 - arm.x);
vec3f diff_col = kd * (1.0f - arm.z);
float diff = 0.0f;
if (BSDF == 0)
diff = fwdLambert(nrm, wi);
else
diff = fwdFrostbiteDiffuse(nrm, wi, wo, arm.y);
vec3f diffuse = diff_col * diff;
vec3f specular = fwdPbrSpecular(spec_col, nrm, wo, wi, alpha, min_roughness);
return diffuse + specular;
}
__device__ void bwdPbrBSDF(
const vec3f kd, const vec3f arm, const vec3f pos, const vec3f nrm, const vec3f view_pos, const vec3f light_pos, const float min_roughness, int BSDF,
vec3f& d_kd, vec3f& d_arm, vec3f& d_pos, vec3f& d_nrm, vec3f& d_view_pos, vec3f& d_light_pos, const vec3f d_out)
{
////////////////////////////////////////////////////////////////////////
// FWD
vec3f _wi = light_pos - pos;
vec3f _wo = view_pos - pos;
vec3f wi = safeNormalize(_wi);
vec3f wo = safeNormalize(_wo);
float alpha = arm.y * arm.y;
vec3f spec_col = (0.04f * (1.0f - arm.z) + kd * arm.z) * (1.0 - arm.x);
vec3f diff_col = kd * (1.0f - arm.z);
float diff = 0.0f;
if (BSDF == 0)
diff = fwdLambert(nrm, wi);
else
diff = fwdFrostbiteDiffuse(nrm, wi, wo, arm.y);
////////////////////////////////////////////////////////////////////////
// BWD
float d_alpha(0);
vec3f d_spec_col(0), d_wi(0), d_wo(0);
bwdPbrSpecular(spec_col, nrm, wo, wi, alpha, min_roughness, d_spec_col, d_nrm, d_wo, d_wi, d_alpha, d_out);
float d_diff = sum(diff_col * d_out);
if (BSDF == 0)
bwdLambert(nrm, wi, d_nrm, d_wi, d_diff);
else
bwdFrostbiteDiffuse(nrm, wi, wo, arm.y, d_nrm, d_wi, d_wo, d_arm.y, d_diff);
// Backprop: diff_col = kd * (1.0f - arm.z)
vec3f d_diff_col = d_out * diff;
d_kd += d_diff_col * (1.0f - arm.z);
d_arm.z -= sum(d_diff_col * kd);
// Backprop: spec_col = (0.04f * (1.0f - arm.z) + kd * arm.z) * (1.0 - arm.x)
d_kd -= d_spec_col * (arm.x - 1.0f) * arm.z;
d_arm.x += sum(d_spec_col * (arm.z * (0.04f - kd) - 0.04f));
d_arm.z -= sum(d_spec_col * (kd - 0.04f) * (arm.x - 1.0f));
// Backprop: alpha = arm.y * arm.y
d_arm.y += d_alpha * 2 * arm.y;
// Backprop: vec3f wi = safeNormalize(light_pos - pos);
vec3f d__wi(0);
bwdSafeNormalize(_wi, d__wi, d_wi);
d_light_pos += d__wi;
d_pos -= d__wi;
// Backprop: vec3f wo = safeNormalize(view_pos - pos);
vec3f d__wo(0);
bwdSafeNormalize(_wo, d__wo, d_wo);
d_view_pos += d__wo;
d_pos -= d__wo;
}
//------------------------------------------------------------------------
// Kernels
__global__ void LambertFwdKernel(LambertKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f nrm = p.nrm.fetch3(px, py, pz);
vec3f wi = p.wi.fetch3(px, py, pz);
float res = fwdLambert(nrm, wi);
p.out.store(px, py, pz, res);
}
__global__ void LambertBwdKernel(LambertKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f nrm = p.nrm.fetch3(px, py, pz);
vec3f wi = p.wi.fetch3(px, py, pz);
float d_out = p.out.fetch1(px, py, pz);
vec3f d_nrm(0), d_wi(0);
bwdLambert(nrm, wi, d_nrm, d_wi, d_out);
p.nrm.store_grad(px, py, pz, d_nrm);
p.wi.store_grad(px, py, pz, d_wi);
}
__global__ void FrostbiteDiffuseFwdKernel(FrostbiteDiffuseKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f nrm = p.nrm.fetch3(px, py, pz);
vec3f wi = p.wi.fetch3(px, py, pz);
vec3f wo = p.wo.fetch3(px, py, pz);
float linearRoughness = p.linearRoughness.fetch1(px, py, pz);
float res = fwdFrostbiteDiffuse(nrm, wi, wo, linearRoughness);
p.out.store(px, py, pz, res);
}
__global__ void FrostbiteDiffuseBwdKernel(FrostbiteDiffuseKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f nrm = p.nrm.fetch3(px, py, pz);
vec3f wi = p.wi.fetch3(px, py, pz);
vec3f wo = p.wo.fetch3(px, py, pz);
float linearRoughness = p.linearRoughness.fetch1(px, py, pz);
float d_out = p.out.fetch1(px, py, pz);
float d_linearRoughness = 0.0f;
vec3f d_nrm(0), d_wi(0), d_wo(0);
bwdFrostbiteDiffuse(nrm, wi, wo, linearRoughness, d_nrm, d_wi, d_wo, d_linearRoughness, d_out);
p.nrm.store_grad(px, py, pz, d_nrm);
p.wi.store_grad(px, py, pz, d_wi);
p.wo.store_grad(px, py, pz, d_wo);
p.linearRoughness.store_grad(px, py, pz, d_linearRoughness);
}
__global__ void FresnelShlickFwdKernel(FresnelShlickKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f f0 = p.f0.fetch3(px, py, pz);
vec3f f90 = p.f90.fetch3(px, py, pz);
float cosTheta = p.cosTheta.fetch1(px, py, pz);
vec3f res = fwdFresnelSchlick(f0, f90, cosTheta);
p.out.store(px, py, pz, res);
}
__global__ void FresnelShlickBwdKernel(FresnelShlickKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f f0 = p.f0.fetch3(px, py, pz);
vec3f f90 = p.f90.fetch3(px, py, pz);
float cosTheta = p.cosTheta.fetch1(px, py, pz);
vec3f d_out = p.out.fetch3(px, py, pz);
vec3f d_f0(0), d_f90(0);
float d_cosTheta(0);
bwdFresnelSchlick(f0, f90, cosTheta, d_f0, d_f90, d_cosTheta, d_out);
p.f0.store_grad(px, py, pz, d_f0);
p.f90.store_grad(px, py, pz, d_f90);
p.cosTheta.store_grad(px, py, pz, d_cosTheta);
}
__global__ void ndfGGXFwdKernel(NdfGGXParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
float cosTheta = p.cosTheta.fetch1(px, py, pz);
float res = fwdNdfGGX(alphaSqr, cosTheta);
p.out.store(px, py, pz, res);
}
__global__ void ndfGGXBwdKernel(NdfGGXParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
float cosTheta = p.cosTheta.fetch1(px, py, pz);
float d_out = p.out.fetch1(px, py, pz);
float d_alphaSqr(0), d_cosTheta(0);
bwdNdfGGX(alphaSqr, cosTheta, d_alphaSqr, d_cosTheta, d_out);
p.alphaSqr.store_grad(px, py, pz, d_alphaSqr);
p.cosTheta.store_grad(px, py, pz, d_cosTheta);
}
__global__ void lambdaGGXFwdKernel(NdfGGXParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
float cosTheta = p.cosTheta.fetch1(px, py, pz);
float res = fwdLambdaGGX(alphaSqr, cosTheta);
p.out.store(px, py, pz, res);
}
__global__ void lambdaGGXBwdKernel(NdfGGXParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
float cosTheta = p.cosTheta.fetch1(px, py, pz);
float d_out = p.out.fetch1(px, py, pz);
float d_alphaSqr(0), d_cosTheta(0);
bwdLambdaGGX(alphaSqr, cosTheta, d_alphaSqr, d_cosTheta, d_out);
p.alphaSqr.store_grad(px, py, pz, d_alphaSqr);
p.cosTheta.store_grad(px, py, pz, d_cosTheta);
}
__global__ void maskingSmithFwdKernel(MaskingSmithParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
float cosThetaI = p.cosThetaI.fetch1(px, py, pz);
float cosThetaO = p.cosThetaO.fetch1(px, py, pz);
float res = fwdMaskingSmithGGXCorrelated(alphaSqr, cosThetaI, cosThetaO);
p.out.store(px, py, pz, res);
}
__global__ void maskingSmithBwdKernel(MaskingSmithParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
float alphaSqr = p.alphaSqr.fetch1(px, py, pz);
float cosThetaI = p.cosThetaI.fetch1(px, py, pz);
float cosThetaO = p.cosThetaO.fetch1(px, py, pz);
float d_out = p.out.fetch1(px, py, pz);
float d_alphaSqr(0), d_cosThetaI(0), d_cosThetaO(0);
bwdMaskingSmithGGXCorrelated(alphaSqr, cosThetaI, cosThetaO, d_alphaSqr, d_cosThetaI, d_cosThetaO, d_out);
p.alphaSqr.store_grad(px, py, pz, d_alphaSqr);
p.cosThetaI.store_grad(px, py, pz, d_cosThetaI);
p.cosThetaO.store_grad(px, py, pz, d_cosThetaO);
}
__global__ void pbrSpecularFwdKernel(PbrSpecular p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f col = p.col.fetch3(px, py, pz);
vec3f nrm = p.nrm.fetch3(px, py, pz);
vec3f wo = p.wo.fetch3(px, py, pz);
vec3f wi = p.wi.fetch3(px, py, pz);
float alpha = p.alpha.fetch1(px, py, pz);
vec3f res = fwdPbrSpecular(col, nrm, wo, wi, alpha, p.min_roughness);
p.out.store(px, py, pz, res);
}
__global__ void pbrSpecularBwdKernel(PbrSpecular p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f col = p.col.fetch3(px, py, pz);
vec3f nrm = p.nrm.fetch3(px, py, pz);
vec3f wo = p.wo.fetch3(px, py, pz);
vec3f wi = p.wi.fetch3(px, py, pz);
float alpha = p.alpha.fetch1(px, py, pz);
vec3f d_out = p.out.fetch3(px, py, pz);
float d_alpha(0);
vec3f d_col(0), d_nrm(0), d_wo(0), d_wi(0);
bwdPbrSpecular(col, nrm, wo, wi, alpha, p.min_roughness, d_col, d_nrm, d_wo, d_wi, d_alpha, d_out);
p.col.store_grad(px, py, pz, d_col);
p.nrm.store_grad(px, py, pz, d_nrm);
p.wo.store_grad(px, py, pz, d_wo);
p.wi.store_grad(px, py, pz, d_wi);
p.alpha.store_grad(px, py, pz, d_alpha);
}
__global__ void pbrBSDFFwdKernel(PbrBSDF p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f kd = p.kd.fetch3(px, py, pz);
vec3f arm = p.arm.fetch3(px, py, pz);
vec3f pos = p.pos.fetch3(px, py, pz);
vec3f nrm = p.nrm.fetch3(px, py, pz);
vec3f view_pos = p.view_pos.fetch3(px, py, pz);
vec3f light_pos = p.light_pos.fetch3(px, py, pz);
vec3f res = fwdPbrBSDF(kd, arm, pos, nrm, view_pos, light_pos, p.min_roughness, p.BSDF);
p.out.store(px, py, pz, res);
}
__global__ void pbrBSDFBwdKernel(PbrBSDF p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f kd = p.kd.fetch3(px, py, pz);
vec3f arm = p.arm.fetch3(px, py, pz);
vec3f pos = p.pos.fetch3(px, py, pz);
vec3f nrm = p.nrm.fetch3(px, py, pz);
vec3f view_pos = p.view_pos.fetch3(px, py, pz);
vec3f light_pos = p.light_pos.fetch3(px, py, pz);
vec3f d_out = p.out.fetch3(px, py, pz);
vec3f d_kd(0), d_arm(0), d_pos(0), d_nrm(0), d_view_pos(0), d_light_pos(0);
bwdPbrBSDF(kd, arm, pos, nrm, view_pos, light_pos, p.min_roughness, p.BSDF, d_kd, d_arm, d_pos, d_nrm, d_view_pos, d_light_pos, d_out);
p.kd.store_grad(px, py, pz, d_kd);
p.arm.store_grad(px, py, pz, d_arm);
p.pos.store_grad(px, py, pz, d_pos);
p.nrm.store_grad(px, py, pz, d_nrm);
p.view_pos.store_grad(px, py, pz, d_view_pos);
p.light_pos.store_grad(px, py, pz, d_light_pos);
}

View File

@@ -0,0 +1,84 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "common.h"
struct LambertKernelParams
{
Tensor nrm;
Tensor wi;
Tensor out;
dim3 gridSize;
};
struct FrostbiteDiffuseKernelParams
{
Tensor nrm;
Tensor wi;
Tensor wo;
Tensor linearRoughness;
Tensor out;
dim3 gridSize;
};
struct FresnelShlickKernelParams
{
Tensor f0;
Tensor f90;
Tensor cosTheta;
Tensor out;
dim3 gridSize;
};
struct NdfGGXParams
{
Tensor alphaSqr;
Tensor cosTheta;
Tensor out;
dim3 gridSize;
};
struct MaskingSmithParams
{
Tensor alphaSqr;
Tensor cosThetaI;
Tensor cosThetaO;
Tensor out;
dim3 gridSize;
};
struct PbrSpecular
{
Tensor col;
Tensor nrm;
Tensor wo;
Tensor wi;
Tensor alpha;
Tensor out;
dim3 gridSize;
float min_roughness;
};
struct PbrBSDF
{
Tensor kd;
Tensor arm;
Tensor pos;
Tensor nrm;
Tensor view_pos;
Tensor light_pos;
Tensor out;
dim3 gridSize;
float min_roughness;
int BSDF;
};

View File

@@ -0,0 +1,74 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <cuda_runtime.h>
#include <algorithm>
//------------------------------------------------------------------------
// Block and grid size calculators for kernel launches.
dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims)
{
int maxThreads = maxWidth * maxHeight;
if (maxThreads <= 1 || (dims.x * dims.y) <= 1)
return dim3(1, 1, 1); // Degenerate.
// Start from max size.
int bw = maxWidth;
int bh = maxHeight;
// Optimizations for weirdly sized buffers.
if (dims.x < bw)
{
// Decrease block width to smallest power of two that covers the buffer width.
while ((bw >> 1) >= dims.x)
bw >>= 1;
// Maximize height.
bh = maxThreads / bw;
if (bh > dims.y)
bh = dims.y;
}
else if (dims.y < bh)
{
// Halve height and double width until fits completely inside buffer vertically.
while (bh > dims.y)
{
bh >>= 1;
if (bw < dims.x)
bw <<= 1;
}
}
// Done.
return dim3(bw, bh, 1);
}
// returns the size of a block that can be reduced using horizontal SIMD operations (e.g. __shfl_xor_sync)
dim3 getWarpSize(dim3 blockSize)
{
return dim3(
std::min(blockSize.x, 32u),
std::min(std::max(32u / blockSize.x, 1u), std::min(32u, blockSize.y)),
std::min(std::max(32u / (blockSize.x * blockSize.y), 1u), std::min(32u, blockSize.z))
);
}
dim3 getLaunchGridSize(dim3 blockSize, dim3 dims)
{
dim3 gridSize;
gridSize.x = (dims.x - 1) / blockSize.x + 1;
gridSize.y = (dims.y - 1) / blockSize.y + 1;
gridSize.z = (dims.z - 1) / blockSize.z + 1;
return gridSize;
}
//------------------------------------------------------------------------

View File

@@ -0,0 +1,41 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include <cuda.h>
#include <stdint.h>
#include "vec3f.h"
#include "vec4f.h"
#include "tensor.h"
dim3 getLaunchBlockSize(int maxWidth, int maxHeight, dim3 dims);
dim3 getLaunchGridSize(dim3 blockSize, dim3 dims);
#ifdef __CUDACC__
#ifdef _MSC_VER
#define M_PI 3.14159265358979323846f
#endif
__host__ __device__ static inline dim3 getWarpSize(dim3 blockSize)
{
return dim3(
min(blockSize.x, 32u),
min(max(32u / blockSize.x, 1u), min(32u, blockSize.y)),
min(max(32u / (blockSize.x * blockSize.y), 1u), min(32u, blockSize.z))
);
}
__device__ static inline float clamp(float val, float mn, float mx) { return min(max(val, mn), mx); }
#else
dim3 getWarpSize(dim3 blockSize);
#endif

View File

@@ -0,0 +1,350 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "common.h"
#include "cubemap.h"
#include <float.h>
// https://cgvr.cs.uni-bremen.de/teaching/cg_literatur/Spherical,%20Cubic,%20and%20Parabolic%20Environment%20Mappings.pdf
__device__ float pixel_area(int x, int y, int N)
{
if (N > 1)
{
int H = N / 2;
x = abs(x - H);
y = abs(y - H);
float dx = atan((float)(x + 1) / (float)H) - atan((float)x / (float)H);
float dy = atan((float)(y + 1) / (float)H) - atan((float)y / (float)H);
return dx * dy;
}
else
return 1;
}
__device__ vec3f cube_to_dir(int x, int y, int side, int N)
{
float fx = 2.0f * (((float)x + 0.5f) / (float)N) - 1.0f;
float fy = 2.0f * (((float)y + 0.5f) / (float)N) - 1.0f;
switch (side)
{
case 0: return safeNormalize(vec3f(1, -fy, -fx));
case 1: return safeNormalize(vec3f(-1, -fy, fx));
case 2: return safeNormalize(vec3f(fx, 1, fy));
case 3: return safeNormalize(vec3f(fx, -1, -fy));
case 4: return safeNormalize(vec3f(fx, -fy, 1));
case 5: return safeNormalize(vec3f(-fx, -fy, -1));
}
return vec3f(0,0,0); // Unreachable
}
__device__ vec3f dir_to_side(int side, vec3f v)
{
switch (side)
{
case 0: return vec3f(-v.z, -v.y, v.x);
case 1: return vec3f( v.z, -v.y, -v.x);
case 2: return vec3f( v.x, v.z, v.y);
case 3: return vec3f( v.x, -v.z, -v.y);
case 4: return vec3f( v.x, -v.y, v.z);
case 5: return vec3f(-v.x, -v.y, -v.z);
}
return vec3f(0,0,0); // Unreachable
}
__device__ void extents_1d(float x, float z, float theta, float& _min, float& _max)
{
float l = sqrtf(x * x + z * z);
float pxr = x + z * tan(theta) * l, pzr = z - x * tan(theta) * l;
float pxl = x - z * tan(theta) * l, pzl = z + x * tan(theta) * l;
if (pzl <= 0.00001f)
_min = pxl > 0.0f ? FLT_MAX : -FLT_MAX;
else
_min = pxl / pzl;
if (pzr <= 0.00001f)
_max = pxr > 0.0f ? FLT_MAX : -FLT_MAX;
else
_max = pxr / pzr;
}
__device__ void dir_extents(int side, int N, vec3f v, float theta, int &_xmin, int& _xmax, int& _ymin, int& _ymax)
{
vec3f c = dir_to_side(side, v); // remap to (x,y,z) where side is at z = 1
if (theta < 0.785398f) // PI/4
{
float xmin, xmax, ymin, ymax;
extents_1d(c.x, c.z, theta, xmin, xmax);
extents_1d(c.y, c.z, theta, ymin, ymax);
if (xmin > 1.0f || xmax < -1.0f || ymin > 1.0f || ymax < -1.0f)
{
_xmin = -1; _xmax = -1; _ymin = -1; _ymax = -1; // Bad aabb
}
else
{
_xmin = (int)min(max((xmin + 1.0f) * (0.5f * (float)N), 0.0f), (float)(N - 1));
_xmax = (int)min(max((xmax + 1.0f) * (0.5f * (float)N), 0.0f), (float)(N - 1));
_ymin = (int)min(max((ymin + 1.0f) * (0.5f * (float)N), 0.0f), (float)(N - 1));
_ymax = (int)min(max((ymax + 1.0f) * (0.5f * (float)N), 0.0f), (float)(N - 1));
}
}
else
{
_xmin = 0.0f;
_xmax = (float)(N-1);
_ymin = 0.0f;
_ymax = (float)(N-1);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Diffuse kernel
__global__ void DiffuseCubemapFwdKernel(DiffuseCubemapKernelParams p)
{
// Calculate pixel position.
int px = blockIdx.x * blockDim.x + threadIdx.x;
int py = blockIdx.y * blockDim.y + threadIdx.y;
int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
int Npx = p.cubemap.dims[1];
vec3f N = cube_to_dir(px, py, pz, Npx);
vec3f col(0);
for (int s = 0; s < p.cubemap.dims[0]; ++s)
{
for (int y = 0; y < Npx; ++y)
{
for (int x = 0; x < Npx; ++x)
{
vec3f L = cube_to_dir(x, y, s, Npx);
float costheta = min(max(dot(N, L), 0.0f), 0.999f);
float w = costheta * pixel_area(x, y, Npx) / 3.141592f; // pi = area of positive hemisphere
col += p.cubemap.fetch3(x, y, s) * w;
}
}
}
p.out.store(px, py, pz, col);
}
__global__ void DiffuseCubemapBwdKernel(DiffuseCubemapKernelParams p)
{
// Calculate pixel position.
int px = blockIdx.x * blockDim.x + threadIdx.x;
int py = blockIdx.y * blockDim.y + threadIdx.y;
int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
int Npx = p.cubemap.dims[1];
vec3f N = cube_to_dir(px, py, pz, Npx);
vec3f grad = p.out.fetch3(px, py, pz);
for (int s = 0; s < p.cubemap.dims[0]; ++s)
{
for (int y = 0; y < Npx; ++y)
{
for (int x = 0; x < Npx; ++x)
{
vec3f L = cube_to_dir(x, y, s, Npx);
float costheta = min(max(dot(N, L), 0.0f), 0.999f);
float w = costheta * pixel_area(x, y, Npx) / 3.141592f; // pi = area of positive hemisphere
atomicAdd((float*)p.cubemap.d_val + p.cubemap.nhwcIndexContinuous(s, y, x, 0), grad.x * w);
atomicAdd((float*)p.cubemap.d_val + p.cubemap.nhwcIndexContinuous(s, y, x, 1), grad.y * w);
atomicAdd((float*)p.cubemap.d_val + p.cubemap.nhwcIndexContinuous(s, y, x, 2), grad.z * w);
}
}
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// GGX splitsum kernel
__device__ inline float ndfGGX(const float alphaSqr, const float cosTheta)
{
float _cosTheta = clamp(cosTheta, 0.0, 1.0f);
float d = (_cosTheta * alphaSqr - _cosTheta) * _cosTheta + 1.0f;
return alphaSqr / (d * d * M_PI);
}
__global__ void SpecularBoundsKernel(SpecularBoundsKernelParams p)
{
int px = blockIdx.x * blockDim.x + threadIdx.x;
int py = blockIdx.y * blockDim.y + threadIdx.y;
int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
int Npx = p.gridSize.x;
vec3f VNR = cube_to_dir(px, py, pz, Npx);
const int TILE_SIZE = 16;
// Brute force entire cubemap and compute bounds for the cone
for (int s = 0; s < p.gridSize.z; ++s)
{
// Assume empty BBox
int _min_x = p.gridSize.x - 1, _max_x = 0;
int _min_y = p.gridSize.y - 1, _max_y = 0;
// For each (8x8) tile
for (int tx = 0; tx < (p.gridSize.x + TILE_SIZE - 1) / TILE_SIZE; tx++)
{
for (int ty = 0; ty < (p.gridSize.y + TILE_SIZE - 1) / TILE_SIZE; ty++)
{
// Compute tile extents
int tsx = tx * TILE_SIZE, tsy = ty * TILE_SIZE;
int tex = min((tx + 1) * TILE_SIZE, p.gridSize.x), tey = min((ty + 1) * TILE_SIZE, p.gridSize.y);
// Use some blunt interval arithmetics to cull tiles
vec3f L0 = cube_to_dir(tsx, tsy, s, Npx), L1 = cube_to_dir(tex, tsy, s, Npx);
vec3f L2 = cube_to_dir(tsx, tey, s, Npx), L3 = cube_to_dir(tex, tey, s, Npx);
float minx = min(min(L0.x, L1.x), min(L2.x, L3.x)), maxx = max(max(L0.x, L1.x), max(L2.x, L3.x));
float miny = min(min(L0.y, L1.y), min(L2.y, L3.y)), maxy = max(max(L0.y, L1.y), max(L2.y, L3.y));
float minz = min(min(L0.z, L1.z), min(L2.z, L3.z)), maxz = max(max(L0.z, L1.z), max(L2.z, L3.z));
float maxdp = max(minx * VNR.x, maxx * VNR.x) + max(miny * VNR.y, maxy * VNR.y) + max(minz * VNR.z, maxz * VNR.z);
if (maxdp >= p.costheta_cutoff)
{
// Test all pixels in tile.
for (int y = tsy; y < tey; ++y)
{
for (int x = tsx; x < tex; ++x)
{
vec3f L = cube_to_dir(x, y, s, Npx);
if (dot(L, VNR) >= p.costheta_cutoff)
{
_min_x = min(_min_x, x);
_max_x = max(_max_x, x);
_min_y = min(_min_y, y);
_max_y = max(_max_y, y);
}
}
}
}
}
}
p.out.store(p.out._nhwcIndex(pz, py, px, s * 4 + 0), _min_x);
p.out.store(p.out._nhwcIndex(pz, py, px, s * 4 + 1), _max_x);
p.out.store(p.out._nhwcIndex(pz, py, px, s * 4 + 2), _min_y);
p.out.store(p.out._nhwcIndex(pz, py, px, s * 4 + 3), _max_y);
}
}
__global__ void SpecularCubemapFwdKernel(SpecularCubemapKernelParams p)
{
// Calculate pixel position.
int px = blockIdx.x * blockDim.x + threadIdx.x;
int py = blockIdx.y * blockDim.y + threadIdx.y;
int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
int Npx = p.cubemap.dims[1];
vec3f VNR = cube_to_dir(px, py, pz, Npx);
float alpha = p.roughness * p.roughness;
float alphaSqr = alpha * alpha;
float wsum = 0.0f;
vec3f col(0);
for (int s = 0; s < p.cubemap.dims[0]; ++s)
{
int xmin, xmax, ymin, ymax;
xmin = (int)p.bounds.fetch(p.bounds._nhwcIndex(pz, py, px, s * 4 + 0));
xmax = (int)p.bounds.fetch(p.bounds._nhwcIndex(pz, py, px, s * 4 + 1));
ymin = (int)p.bounds.fetch(p.bounds._nhwcIndex(pz, py, px, s * 4 + 2));
ymax = (int)p.bounds.fetch(p.bounds._nhwcIndex(pz, py, px, s * 4 + 3));
if (xmin <= xmax)
{
for (int y = ymin; y <= ymax; ++y)
{
for (int x = xmin; x <= xmax; ++x)
{
vec3f L = cube_to_dir(x, y, s, Npx);
if (dot(L, VNR) >= p.costheta_cutoff)
{
vec3f H = safeNormalize(L + VNR);
float wiDotN = max(dot(L, VNR), 0.0f);
float VNRDotH = max(dot(VNR, H), 0.0f);
float w = wiDotN * ndfGGX(alphaSqr, VNRDotH) * pixel_area(x, y, Npx) / 4.0f;
col += p.cubemap.fetch3(x, y, s) * w;
wsum += w;
}
}
}
}
}
p.out.store(p.out._nhwcIndex(pz, py, px, 0), col.x);
p.out.store(p.out._nhwcIndex(pz, py, px, 1), col.y);
p.out.store(p.out._nhwcIndex(pz, py, px, 2), col.z);
p.out.store(p.out._nhwcIndex(pz, py, px, 3), wsum);
}
__global__ void SpecularCubemapBwdKernel(SpecularCubemapKernelParams p)
{
// Calculate pixel position.
int px = blockIdx.x * blockDim.x + threadIdx.x;
int py = blockIdx.y * blockDim.y + threadIdx.y;
int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
int Npx = p.cubemap.dims[1];
vec3f VNR = cube_to_dir(px, py, pz, Npx);
vec3f grad = p.out.fetch3(px, py, pz);
float alpha = p.roughness * p.roughness;
float alphaSqr = alpha * alpha;
vec3f col(0);
for (int s = 0; s < p.cubemap.dims[0]; ++s)
{
int xmin, xmax, ymin, ymax;
xmin = (int)p.bounds.fetch(p.bounds._nhwcIndex(pz, py, px, s * 4 + 0));
xmax = (int)p.bounds.fetch(p.bounds._nhwcIndex(pz, py, px, s * 4 + 1));
ymin = (int)p.bounds.fetch(p.bounds._nhwcIndex(pz, py, px, s * 4 + 2));
ymax = (int)p.bounds.fetch(p.bounds._nhwcIndex(pz, py, px, s * 4 + 3));
if (xmin <= xmax)
{
for (int y = ymin; y <= ymax; ++y)
{
for (int x = xmin; x <= xmax; ++x)
{
vec3f L = cube_to_dir(x, y, s, Npx);
if (dot(L, VNR) >= p.costheta_cutoff)
{
vec3f H = safeNormalize(L + VNR);
float wiDotN = max(dot(L, VNR), 0.0f);
float VNRDotH = max(dot(VNR, H), 0.0f);
float w = wiDotN * ndfGGX(alphaSqr, VNRDotH) * pixel_area(x, y, Npx) / 4.0f;
atomicAdd((float*)p.cubemap.d_val + p.cubemap.nhwcIndexContinuous(s, y, x, 0), grad.x * w);
atomicAdd((float*)p.cubemap.d_val + p.cubemap.nhwcIndexContinuous(s, y, x, 1), grad.y * w);
atomicAdd((float*)p.cubemap.d_val + p.cubemap.nhwcIndexContinuous(s, y, x, 2), grad.z * w);
}
}
}
}
}
}

View File

@@ -0,0 +1,38 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "common.h"
struct DiffuseCubemapKernelParams
{
Tensor cubemap;
Tensor out;
dim3 gridSize;
};
struct SpecularCubemapKernelParams
{
Tensor cubemap;
Tensor bounds;
Tensor out;
dim3 gridSize;
float costheta_cutoff;
float roughness;
};
struct SpecularBoundsKernelParams
{
float costheta_cutoff;
Tensor out;
dim3 gridSize;
};

View File

@@ -0,0 +1,210 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <cuda.h>
#include "common.h"
#include "loss.h"
//------------------------------------------------------------------------
// Utils
__device__ inline float bwdAbs(float x) { return x == 0.0f ? 0.0f : x < 0.0f ? -1.0f : 1.0f; }
__device__ float warpSum(float val) {
for (int i = 1; i < 32; i *= 2)
val += __shfl_xor_sync(0xFFFFFFFF, val, i);
return val;
}
//------------------------------------------------------------------------
// Tonemapping
__device__ inline float fwdSRGB(float x)
{
return x > 0.0031308f ? powf(max(x, 0.0031308f), 1.0f / 2.4f) * 1.055f - 0.055f : 12.92f * max(x, 0.0f);
}
__device__ inline void bwdSRGB(float x, float &d_x, float d_out)
{
if (x > 0.0031308f)
d_x += d_out * 0.439583f / powf(x, 0.583333f);
else if (x > 0.0f)
d_x += d_out * 12.92f;
}
__device__ inline vec3f fwdTonemapLogSRGB(vec3f x)
{
return vec3f(fwdSRGB(logf(x.x + 1.0f)), fwdSRGB(logf(x.y + 1.0f)), fwdSRGB(logf(x.z + 1.0f)));
}
__device__ inline void bwdTonemapLogSRGB(vec3f x, vec3f& d_x, vec3f d_out)
{
if (x.x > 0.0f && x.x < 65535.0f)
{
bwdSRGB(logf(x.x + 1.0f), d_x.x, d_out.x);
d_x.x *= 1 / (x.x + 1.0f);
}
if (x.y > 0.0f && x.y < 65535.0f)
{
bwdSRGB(logf(x.y + 1.0f), d_x.y, d_out.y);
d_x.y *= 1 / (x.y + 1.0f);
}
if (x.z > 0.0f && x.z < 65535.0f)
{
bwdSRGB(logf(x.z + 1.0f), d_x.z, d_out.z);
d_x.z *= 1 / (x.z + 1.0f);
}
}
__device__ inline float fwdRELMSE(float img, float target, float eps = 0.1f)
{
return (img - target) * (img - target) / (img * img + target * target + eps);
}
__device__ inline void bwdRELMSE(float img, float target, float &d_img, float &d_target, float d_out, float eps = 0.1f)
{
float denom = (target * target + img * img + eps);
d_img += d_out * 2 * (img - target) * (target * (target + img) + eps) / (denom * denom);
d_target -= d_out * 2 * (img - target) * (img * (target + img) + eps) / (denom * denom);
}
__device__ inline float fwdSMAPE(float img, float target, float eps=0.01f)
{
return abs(img - target) / (img + target + eps);
}
__device__ inline void bwdSMAPE(float img, float target, float& d_img, float& d_target, float d_out, float eps = 0.01f)
{
float denom = (target + img + eps);
d_img += d_out * bwdAbs(img - target) * (2 * target + eps) / (denom * denom);
d_target -= d_out * bwdAbs(img - target) * (2 * img + eps) / (denom * denom);
}
//------------------------------------------------------------------------
// Kernels
__global__ void imgLossFwdKernel(LossKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
float floss = 0.0f;
if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z)
{
vec3f img = p.img.fetch3(px, py, pz);
vec3f target = p.target.fetch3(px, py, pz);
img = vec3f(clamp(img.x, 0.0f, 65535.0f), clamp(img.y, 0.0f, 65535.0f), clamp(img.z, 0.0f, 65535.0f));
target = vec3f(clamp(target.x, 0.0f, 65535.0f), clamp(target.y, 0.0f, 65535.0f), clamp(target.z, 0.0f, 65535.0f));
if (p.tonemapper == TONEMAPPER_LOG_SRGB)
{
img = fwdTonemapLogSRGB(img);
target = fwdTonemapLogSRGB(target);
}
vec3f vloss(0);
if (p.loss == LOSS_MSE)
vloss = (img - target) * (img - target);
else if (p.loss == LOSS_RELMSE)
vloss = vec3f(fwdRELMSE(img.x, target.x), fwdRELMSE(img.y, target.y), fwdRELMSE(img.z, target.z));
else if (p.loss == LOSS_SMAPE)
vloss = vec3f(fwdSMAPE(img.x, target.x), fwdSMAPE(img.y, target.y), fwdSMAPE(img.z, target.z));
else
vloss = vec3f(abs(img.x - target.x), abs(img.y - target.y), abs(img.z - target.z));
floss = sum(vloss) / 3.0f;
}
floss = warpSum(floss);
dim3 warpSize = getWarpSize(blockDim);
if (px < p.gridSize.x && py < p.gridSize.y && pz < p.gridSize.z && threadIdx.x % warpSize.x == 0 && threadIdx.y % warpSize.y == 0 && threadIdx.z % warpSize.z == 0)
p.out.store(px / warpSize.x, py / warpSize.y, pz / warpSize.z, floss);
}
__global__ void imgLossBwdKernel(LossKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
dim3 warpSize = getWarpSize(blockDim);
vec3f _img = p.img.fetch3(px, py, pz);
vec3f _target = p.target.fetch3(px, py, pz);
float d_out = p.out.fetch1(px / warpSize.x, py / warpSize.y, pz / warpSize.z);
/////////////////////////////////////////////////////////////////////
// FWD
vec3f img = _img, target = _target;
if (p.tonemapper == TONEMAPPER_LOG_SRGB)
{
img = fwdTonemapLogSRGB(img);
target = fwdTonemapLogSRGB(target);
}
/////////////////////////////////////////////////////////////////////
// BWD
vec3f d_vloss = vec3f(d_out, d_out, d_out) / 3.0f;
vec3f d_img(0), d_target(0);
if (p.loss == LOSS_MSE)
{
d_img = vec3f(d_vloss.x * 2 * (img.x - target.x), d_vloss.y * 2 * (img.y - target.y), d_vloss.x * 2 * (img.z - target.z));
d_target = -d_img;
}
else if (p.loss == LOSS_RELMSE)
{
bwdRELMSE(img.x, target.x, d_img.x, d_target.x, d_vloss.x);
bwdRELMSE(img.y, target.y, d_img.y, d_target.y, d_vloss.y);
bwdRELMSE(img.z, target.z, d_img.z, d_target.z, d_vloss.z);
}
else if (p.loss == LOSS_SMAPE)
{
bwdSMAPE(img.x, target.x, d_img.x, d_target.x, d_vloss.x);
bwdSMAPE(img.y, target.y, d_img.y, d_target.y, d_vloss.y);
bwdSMAPE(img.z, target.z, d_img.z, d_target.z, d_vloss.z);
}
else
{
d_img = d_vloss * vec3f(bwdAbs(img.x - target.x), bwdAbs(img.y - target.y), bwdAbs(img.z - target.z));
d_target = -d_img;
}
if (p.tonemapper == TONEMAPPER_LOG_SRGB)
{
vec3f d__img(0), d__target(0);
bwdTonemapLogSRGB(_img, d__img, d_img);
bwdTonemapLogSRGB(_target, d__target, d_target);
d_img = d__img; d_target = d__target;
}
if (_img.x <= 0.0f || _img.x >= 65535.0f) d_img.x = 0;
if (_img.y <= 0.0f || _img.y >= 65535.0f) d_img.y = 0;
if (_img.z <= 0.0f || _img.z >= 65535.0f) d_img.z = 0;
if (_target.x <= 0.0f || _target.x >= 65535.0f) d_target.x = 0;
if (_target.y <= 0.0f || _target.y >= 65535.0f) d_target.y = 0;
if (_target.z <= 0.0f || _target.z >= 65535.0f) d_target.z = 0;
p.img.store_grad(px, py, pz, d_img);
p.target.store_grad(px, py, pz, d_target);
}

View File

@@ -0,0 +1,38 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "common.h"
enum TonemapperType
{
TONEMAPPER_NONE = 0,
TONEMAPPER_LOG_SRGB = 1
};
enum LossType
{
LOSS_L1 = 0,
LOSS_MSE = 1,
LOSS_RELMSE = 2,
LOSS_SMAPE = 3
};
struct LossKernelParams
{
Tensor img;
Tensor target;
Tensor out;
dim3 gridSize;
TonemapperType tonemapper;
LossType loss;
};

View File

@@ -0,0 +1,94 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include <cuda.h>
#include <stdio.h>
#include "common.h"
#include "mesh.h"
//------------------------------------------------------------------------
// Kernels
__global__ void xfmPointsFwdKernel(XfmKernelParams p)
{
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
__shared__ float mtx[4][4];
if (threadIdx.x < 16)
mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
__syncthreads();
if (px >= p.gridSize.x)
return;
vec3f pos(
p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
);
if (p.isPoints)
{
p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0] + mtx[3][0]);
p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1] + mtx[3][1]);
p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2] + mtx[3][2]);
p.out.store(p.out.nhwcIndex(pz, px, 3, 0), pos.x * mtx[0][3] + pos.y * mtx[1][3] + pos.z * mtx[2][3] + mtx[3][3]);
}
else
{
p.out.store(p.out.nhwcIndex(pz, px, 0, 0), pos.x * mtx[0][0] + pos.y * mtx[1][0] + pos.z * mtx[2][0]);
p.out.store(p.out.nhwcIndex(pz, px, 1, 0), pos.x * mtx[0][1] + pos.y * mtx[1][1] + pos.z * mtx[2][1]);
p.out.store(p.out.nhwcIndex(pz, px, 2, 0), pos.x * mtx[0][2] + pos.y * mtx[1][2] + pos.z * mtx[2][2]);
}
}
__global__ void xfmPointsBwdKernel(XfmKernelParams p)
{
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int pz = blockIdx.z * blockDim.z + threadIdx.z;
__shared__ float mtx[4][4];
if (threadIdx.x < 16)
mtx[threadIdx.x % 4][threadIdx.x / 4] = p.matrix.fetch(p.matrix.nhwcIndex(pz, threadIdx.x / 4, threadIdx.x % 4, 0));
__syncthreads();
if (px >= p.gridSize.x)
return;
vec3f pos(
p.points.fetch(p.points.nhwcIndex(pz, px, 0, 0)),
p.points.fetch(p.points.nhwcIndex(pz, px, 1, 0)),
p.points.fetch(p.points.nhwcIndex(pz, px, 2, 0))
);
vec4f d_out(
p.out.fetch(p.out.nhwcIndex(pz, px, 0, 0)),
p.out.fetch(p.out.nhwcIndex(pz, px, 1, 0)),
p.out.fetch(p.out.nhwcIndex(pz, px, 2, 0)),
p.out.fetch(p.out.nhwcIndex(pz, px, 3, 0))
);
if (p.isPoints)
{
p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2] + d_out.w * mtx[0][3]);
p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2] + d_out.w * mtx[1][3]);
p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2] + d_out.w * mtx[2][3]);
}
else
{
p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 0, 0), d_out.x * mtx[0][0] + d_out.y * mtx[0][1] + d_out.z * mtx[0][2]);
p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 1, 0), d_out.x * mtx[1][0] + d_out.y * mtx[1][1] + d_out.z * mtx[1][2]);
p.points.store_grad(p.points.nhwcIndexContinuous(pz, px, 2, 0), d_out.x * mtx[2][0] + d_out.y * mtx[2][1] + d_out.z * mtx[2][2]);
}
}

View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "common.h"
struct XfmKernelParams
{
bool isPoints;
Tensor points;
Tensor matrix;
Tensor out;
dim3 gridSize;
};

View File

@@ -0,0 +1,182 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#include "common.h"
#include "normal.h"
#define NORMAL_THRESHOLD 0.1f
//------------------------------------------------------------------------
// Perturb shading normal by tangent frame
__device__ vec3f fwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, bool opengl)
{
vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
vec3f smooth_bitng = safeNormalize(_smooth_bitng);
vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
return safeNormalize(_shading_nrm);
}
__device__ void bwdPerturbNormal(const vec3f perturbed_nrm, const vec3f smooth_nrm, const vec3f smooth_tng, vec3f &d_perturbed_nrm, vec3f &d_smooth_nrm, vec3f &d_smooth_tng, const vec3f d_out, bool opengl)
{
////////////////////////////////////////////////////////////////////////
// FWD
vec3f _smooth_bitng = cross(smooth_tng, smooth_nrm);
vec3f smooth_bitng = safeNormalize(_smooth_bitng);
vec3f _shading_nrm = smooth_tng * perturbed_nrm.x + (opengl ? -1 : 1) * smooth_bitng * perturbed_nrm.y + smooth_nrm * max(perturbed_nrm.z, 0.0f);
////////////////////////////////////////////////////////////////////////
// BWD
vec3f d_shading_nrm(0);
bwdSafeNormalize(_shading_nrm, d_shading_nrm, d_out);
vec3f d_smooth_bitng(0);
if (perturbed_nrm.z > 0.0f)
{
d_smooth_nrm += d_shading_nrm * perturbed_nrm.z;
d_perturbed_nrm.z += sum(d_shading_nrm * smooth_nrm);
}
d_smooth_bitng += (opengl ? -1 : 1) * d_shading_nrm * perturbed_nrm.y;
d_perturbed_nrm.y += (opengl ? -1 : 1) * sum(d_shading_nrm * smooth_bitng);
d_smooth_tng += d_shading_nrm * perturbed_nrm.x;
d_perturbed_nrm.x += sum(d_shading_nrm * smooth_tng);
vec3f d__smooth_bitng(0);
bwdSafeNormalize(_smooth_bitng, d__smooth_bitng, d_smooth_bitng);
bwdCross(smooth_tng, smooth_nrm, d_smooth_tng, d_smooth_nrm, d__smooth_bitng);
}
//------------------------------------------------------------------------
#define bent_nrm_eps 0.001f
__device__ vec3f fwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm)
{
float dp = dot(view_vec, smooth_nrm);
float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
return geom_nrm * (1.0f - t) + smooth_nrm * t;
}
__device__ void bwdBendNormal(const vec3f view_vec, const vec3f smooth_nrm, const vec3f geom_nrm, vec3f& d_view_vec, vec3f& d_smooth_nrm, vec3f& d_geom_nrm, const vec3f d_out)
{
////////////////////////////////////////////////////////////////////////
// FWD
float dp = dot(view_vec, smooth_nrm);
float t = clamp(dp / NORMAL_THRESHOLD, 0.0f, 1.0f);
////////////////////////////////////////////////////////////////////////
// BWD
if (dp > NORMAL_THRESHOLD)
d_smooth_nrm += d_out;
else
{
// geom_nrm * (1.0f - t) + smooth_nrm * t;
d_geom_nrm += d_out * (1.0f - t);
d_smooth_nrm += d_out * t;
float d_t = sum(d_out * (smooth_nrm - geom_nrm));
float d_dp = dp < 0.0f || dp > NORMAL_THRESHOLD ? 0.0f : d_t / NORMAL_THRESHOLD;
bwdDot(view_vec, smooth_nrm, d_view_vec, d_smooth_nrm, d_dp);
}
}
//------------------------------------------------------------------------
// Kernels
__global__ void PrepareShadingNormalFwdKernel(PrepareShadingNormalKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f pos = p.pos.fetch3(px, py, pz);
vec3f view_pos = p.view_pos.fetch3(px, py, pz);
vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
vec3f smooth_nrm = safeNormalize(_smooth_nrm);
vec3f smooth_tng = safeNormalize(_smooth_tng);
vec3f view_vec = safeNormalize(view_pos - pos);
vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
vec3f res;
if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
res = fwdBendNormal(view_vec, -shading_nrm, -geom_nrm);
else
res = fwdBendNormal(view_vec, shading_nrm, geom_nrm);
p.out.store(px, py, pz, res);
}
__global__ void PrepareShadingNormalBwdKernel(PrepareShadingNormalKernelParams p)
{
// Calculate pixel position.
unsigned int px = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int py = blockIdx.y * blockDim.y + threadIdx.y;
unsigned int pz = blockIdx.z;
if (px >= p.gridSize.x || py >= p.gridSize.y || pz >= p.gridSize.z)
return;
vec3f pos = p.pos.fetch3(px, py, pz);
vec3f view_pos = p.view_pos.fetch3(px, py, pz);
vec3f perturbed_nrm = p.perturbed_nrm.fetch3(px, py, pz);
vec3f _smooth_nrm = p.smooth_nrm.fetch3(px, py, pz);
vec3f _smooth_tng = p.smooth_tng.fetch3(px, py, pz);
vec3f geom_nrm = p.geom_nrm.fetch3(px, py, pz);
vec3f d_out = p.out.fetch3(px, py, pz);
///////////////////////////////////////////////////////////////////////////////////////////////////
// FWD
vec3f smooth_nrm = safeNormalize(_smooth_nrm);
vec3f smooth_tng = safeNormalize(_smooth_tng);
vec3f _view_vec = view_pos - pos;
vec3f view_vec = safeNormalize(view_pos - pos);
vec3f shading_nrm = fwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, p.opengl);
///////////////////////////////////////////////////////////////////////////////////////////////////
// BWD
vec3f d_view_vec(0), d_shading_nrm(0), d_geom_nrm(0);
if (p.two_sided_shading && dot(view_vec, geom_nrm) < 0.0f)
{
bwdBendNormal(view_vec, -shading_nrm, -geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
d_shading_nrm = -d_shading_nrm;
d_geom_nrm = -d_geom_nrm;
}
else
bwdBendNormal(view_vec, shading_nrm, geom_nrm, d_view_vec, d_shading_nrm, d_geom_nrm, d_out);
vec3f d_perturbed_nrm(0), d_smooth_nrm(0), d_smooth_tng(0);
bwdPerturbNormal(perturbed_nrm, smooth_nrm, smooth_tng, d_perturbed_nrm, d_smooth_nrm, d_smooth_tng, d_shading_nrm, p.opengl);
vec3f d__view_vec(0), d__smooth_nrm(0), d__smooth_tng(0);
bwdSafeNormalize(_view_vec, d__view_vec, d_view_vec);
bwdSafeNormalize(_smooth_nrm, d__smooth_nrm, d_smooth_nrm);
bwdSafeNormalize(_smooth_tng, d__smooth_tng, d_smooth_tng);
p.pos.store_grad(px, py, pz, -d__view_vec);
p.view_pos.store_grad(px, py, pz, d__view_vec);
p.perturbed_nrm.store_grad(px, py, pz, d_perturbed_nrm);
p.smooth_nrm.store_grad(px, py, pz, d__smooth_nrm);
p.smooth_tng.store_grad(px, py, pz, d__smooth_tng);
p.geom_nrm.store_grad(px, py, pz, d_geom_nrm);
}

View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#include "common.h"
struct PrepareShadingNormalKernelParams
{
Tensor pos;
Tensor view_pos;
Tensor perturbed_nrm;
Tensor smooth_nrm;
Tensor smooth_tng;
Tensor geom_nrm;
Tensor out;
dim3 gridSize;
bool two_sided_shading, opengl;
};

View File

@@ -0,0 +1,92 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
#if defined(__CUDACC__) && defined(BFLOAT16)
#include <cuda_bf16.h> // bfloat16 is float32 compatible with less mantissa bits
#endif
//---------------------------------------------------------------------------------
// CUDA-side Tensor class for in/out parameter parsing. Can be float32 or bfloat16
struct Tensor
{
void* val;
void* d_val;
int dims[4], _dims[4];
int strides[4];
bool fp16;
#if defined(__CUDA__) && !defined(__CUDA_ARCH__)
Tensor() : val(nullptr), d_val(nullptr), fp16(true), dims{ 0, 0, 0, 0 }, _dims{ 0, 0, 0, 0 }, strides{ 0, 0, 0, 0 } {}
#endif
#ifdef __CUDACC__
// Helpers to index and read/write a single element
__device__ inline int _nhwcIndex(int n, int h, int w, int c) const { return n * strides[0] + h * strides[1] + w * strides[2] + c * strides[3]; }
__device__ inline int nhwcIndex(int n, int h, int w, int c) const { return (dims[0] == 1 ? 0 : n * strides[0]) + (dims[1] == 1 ? 0 : h * strides[1]) + (dims[2] == 1 ? 0 : w * strides[2]) + (dims[3] == 1 ? 0 : c * strides[3]); }
__device__ inline int nhwcIndexContinuous(int n, int h, int w, int c) const { return ((n * _dims[1] + h) * _dims[2] + w) * _dims[3] + c; }
#ifdef BFLOAT16
__device__ inline float fetch(unsigned int idx) const { return fp16 ? __bfloat162float(((__nv_bfloat16*)val)[idx]) : ((float*)val)[idx]; }
__device__ inline void store(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)val)[idx] = __float2bfloat16(_val); else ((float*)val)[idx] = _val; }
__device__ inline void store_grad(unsigned int idx, float _val) { if (fp16) ((__nv_bfloat16*)d_val)[idx] = __float2bfloat16(_val); else ((float*)d_val)[idx] = _val; }
#else
__device__ inline float fetch(unsigned int idx) const { return ((float*)val)[idx]; }
__device__ inline void store(unsigned int idx, float _val) { ((float*)val)[idx] = _val; }
__device__ inline void store_grad(unsigned int idx, float _val) { ((float*)d_val)[idx] = _val; }
#endif
//////////////////////////////////////////////////////////////////////////////////////////
// Fetch, use broadcasting for tensor dimensions of size 1
__device__ inline float fetch1(unsigned int x, unsigned int y, unsigned int z) const
{
return fetch(nhwcIndex(z, y, x, 0));
}
__device__ inline vec3f fetch3(unsigned int x, unsigned int y, unsigned int z) const
{
return vec3f(
fetch(nhwcIndex(z, y, x, 0)),
fetch(nhwcIndex(z, y, x, 1)),
fetch(nhwcIndex(z, y, x, 2))
);
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Store, no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
__device__ inline void store(unsigned int x, unsigned int y, unsigned int z, float _val)
{
store(_nhwcIndex(z, y, x, 0), _val);
}
__device__ inline void store(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
{
store(_nhwcIndex(z, y, x, 0), _val.x);
store(_nhwcIndex(z, y, x, 1), _val.y);
store(_nhwcIndex(z, y, x, 2), _val.z);
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Store gradient , no broadcasting here. Assume we output full res gradient and then reduce using torch.sum outside
__device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, float _val)
{
store_grad(nhwcIndexContinuous(z, y, x, 0), _val);
}
__device__ inline void store_grad(unsigned int x, unsigned int y, unsigned int z, vec3f _val)
{
store_grad(nhwcIndexContinuous(z, y, x, 0), _val.x);
store_grad(nhwcIndexContinuous(z, y, x, 1), _val.y);
store_grad(nhwcIndexContinuous(z, y, x, 2), _val.z);
}
#endif
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,109 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
struct vec3f
{
float x, y, z;
#ifdef __CUDACC__
__device__ vec3f() { }
__device__ vec3f(float v) { x = v; y = v; z = v; }
__device__ vec3f(float _x, float _y, float _z) { x = _x; y = _y; z = _z; }
__device__ vec3f(float3 v) { x = v.x; y = v.y; z = v.z; }
__device__ inline vec3f& operator+=(const vec3f& b) { x += b.x; y += b.y; z += b.z; return *this; }
__device__ inline vec3f& operator-=(const vec3f& b) { x -= b.x; y -= b.y; z -= b.z; return *this; }
__device__ inline vec3f& operator*=(const vec3f& b) { x *= b.x; y *= b.y; z *= b.z; return *this; }
__device__ inline vec3f& operator/=(const vec3f& b) { x /= b.x; y /= b.y; z /= b.z; return *this; }
#endif
};
#ifdef __CUDACC__
__device__ static inline vec3f operator+(const vec3f& a, const vec3f& b) { return vec3f(a.x + b.x, a.y + b.y, a.z + b.z); }
__device__ static inline vec3f operator-(const vec3f& a, const vec3f& b) { return vec3f(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__ static inline vec3f operator*(const vec3f& a, const vec3f& b) { return vec3f(a.x * b.x, a.y * b.y, a.z * b.z); }
__device__ static inline vec3f operator/(const vec3f& a, const vec3f& b) { return vec3f(a.x / b.x, a.y / b.y, a.z / b.z); }
__device__ static inline vec3f operator-(const vec3f& a) { return vec3f(-a.x, -a.y, -a.z); }
__device__ static inline float sum(vec3f a)
{
return a.x + a.y + a.z;
}
__device__ static inline vec3f cross(vec3f a, vec3f b)
{
vec3f out;
out.x = a.y * b.z - a.z * b.y;
out.y = a.z * b.x - a.x * b.z;
out.z = a.x * b.y - a.y * b.x;
return out;
}
__device__ static inline void bwdCross(vec3f a, vec3f b, vec3f &d_a, vec3f &d_b, vec3f d_out)
{
d_a.x += d_out.z * b.y - d_out.y * b.z;
d_a.y += d_out.x * b.z - d_out.z * b.x;
d_a.z += d_out.y * b.x - d_out.x * b.y;
d_b.x += d_out.y * a.z - d_out.z * a.y;
d_b.y += d_out.z * a.x - d_out.x * a.z;
d_b.z += d_out.x * a.y - d_out.y * a.x;
}
__device__ static inline float dot(vec3f a, vec3f b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
}
__device__ static inline void bwdDot(vec3f a, vec3f b, vec3f& d_a, vec3f& d_b, float d_out)
{
d_a.x += d_out * b.x; d_a.y += d_out * b.y; d_a.z += d_out * b.z;
d_b.x += d_out * a.x; d_b.y += d_out * a.y; d_b.z += d_out * a.z;
}
__device__ static inline vec3f reflect(vec3f x, vec3f n)
{
return n * 2.0f * dot(n, x) - x;
}
__device__ static inline void bwdReflect(vec3f x, vec3f n, vec3f& d_x, vec3f& d_n, const vec3f d_out)
{
d_x.x += d_out.x * (2 * n.x * n.x - 1) + d_out.y * (2 * n.x * n.y) + d_out.z * (2 * n.x * n.z);
d_x.y += d_out.x * (2 * n.x * n.y) + d_out.y * (2 * n.y * n.y - 1) + d_out.z * (2 * n.y * n.z);
d_x.z += d_out.x * (2 * n.x * n.z) + d_out.y * (2 * n.y * n.z) + d_out.z * (2 * n.z * n.z - 1);
d_n.x += d_out.x * (2 * (2 * n.x * x.x + n.y * x.y + n.z * x.z)) + d_out.y * (2 * n.y * x.x) + d_out.z * (2 * n.z * x.x);
d_n.y += d_out.x * (2 * n.x * x.y) + d_out.y * (2 * (n.x * x.x + 2 * n.y * x.y + n.z * x.z)) + d_out.z * (2 * n.z * x.y);
d_n.z += d_out.x * (2 * n.x * x.z) + d_out.y * (2 * n.y * x.z) + d_out.z * (2 * (n.x * x.x + n.y * x.y + 2 * n.z * x.z));
}
__device__ static inline vec3f safeNormalize(vec3f v)
{
float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
return l > 0.0f ? (v / l) : vec3f(0.0f);
}
__device__ static inline void bwdSafeNormalize(const vec3f v, vec3f& d_v, const vec3f d_out)
{
float l = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z);
if (l > 0.0f)
{
float fac = 1.0 / powf(v.x * v.x + v.y * v.y + v.z * v.z, 1.5f);
d_v.x += (d_out.x * (v.y * v.y + v.z * v.z) - d_out.y * (v.x * v.y) - d_out.z * (v.x * v.z)) * fac;
d_v.y += (d_out.y * (v.x * v.x + v.z * v.z) - d_out.x * (v.y * v.x) - d_out.z * (v.y * v.z)) * fac;
d_v.z += (d_out.z * (v.x * v.x + v.y * v.y) - d_out.x * (v.z * v.x) - d_out.y * (v.z * v.y)) * fac;
}
}
#endif

View File

@@ -0,0 +1,25 @@
/*
* Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
struct vec4f
{
float x, y, z, w;
#ifdef __CUDACC__
__device__ vec4f() { }
__device__ vec4f(float v) { x = v; y = v; z = v; w = v; }
__device__ vec4f(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; }
__device__ vec4f(float4 v) { x = v.x; y = v.y; z = v.z; w = v.w; }
#endif
};

View File

@@ -0,0 +1,41 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import torch
#----------------------------------------------------------------------------
# HDR image losses
#----------------------------------------------------------------------------
def _tonemap_srgb(f):
return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
def _SMAPE(img, target, eps=0.01):
nom = torch.abs(img - target)
denom = torch.abs(img) + torch.abs(target) + 0.01
return torch.mean(nom / denom)
def _RELMSE(img, target, eps=0.1):
nom = (img - target) * (img - target)
denom = img * img + target * target + 0.1
return torch.mean(nom / denom)
def image_loss_fn(img, target, loss, tonemapper):
if tonemapper == 'log_srgb':
img = _tonemap_srgb(torch.log(torch.clamp(img, min=0, max=65535) + 1))
target = _tonemap_srgb(torch.log(torch.clamp(target, min=0, max=65535) + 1))
if loss == 'mse':
return torch.nn.functional.mse_loss(img, target)
elif loss == 'smape':
return _SMAPE(img, target)
elif loss == 'relmse':
return _RELMSE(img, target)
else:
return torch.nn.functional.l1_loss(img, target)

554
render/renderutils/ops.py Normal file
View File

@@ -0,0 +1,554 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import numpy as np
import os
import sys
import torch
import torch.utils.cpp_extension
from .bsdf import *
from .loss import *
#----------------------------------------------------------------------------
# C++/Cuda plugin compiler/loader.
_cached_plugin = None
def _get_plugin():
# Return cached plugin if already loaded.
global _cached_plugin
if _cached_plugin is not None:
return _cached_plugin
# Make sure we can find the necessary compiler and libary binaries.
if os.name == 'nt':
def find_cl_path():
import glob
for edition in ['Enterprise', 'Professional', 'BuildTools', 'Community']:
paths = sorted(glob.glob(r"C:\Program Files (x86)\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition), reverse=True)
if paths:
return paths[0]
# If cl.exe is not on path, try to find it.
if os.system("where cl.exe >nul 2>nul") != 0:
cl_path = find_cl_path()
if cl_path is None:
raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
os.environ['PATH'] += ';' + cl_path
# Compiler options.
opts = ['-DNVDR_TORCH']
# Linker options.
if os.name == 'posix':
ldflags = ['-lcuda', '-lnvrtc']
elif os.name == 'nt':
ldflags = ['cuda.lib', 'advapi32.lib', 'nvrtc.lib']
# List of sources.
source_files = [
'c_src/mesh.cu',
'c_src/loss.cu',
'c_src/bsdf.cu',
'c_src/normal.cu',
'c_src/cubemap.cu',
'c_src/common.cpp',
'c_src/torch_bindings.cpp'
]
# Some containers set this to contain old architectures that won't compile. We only need the one installed in the machine.
os.environ['TORCH_CUDA_ARCH_LIST'] = ''
# Try to detect if a stray lock file is left in cache directory and show a warning. This sometimes happens on Windows if the build is interrupted at just the right moment.
try:
lock_fn = os.path.join(torch.utils.cpp_extension._get_build_directory('renderutils_plugin', False), 'lock')
if os.path.exists(lock_fn):
print("Warning: Lock file exists in build directory: '%s'" % lock_fn)
except:
pass
# Compile and load.
source_paths = [os.path.join(os.path.dirname(__file__), fn) for fn in source_files]
torch.utils.cpp_extension.load(name='renderutils_plugin', sources=source_paths, extra_cflags=opts,
extra_cuda_cflags=opts, extra_ldflags=ldflags, with_cuda=True, verbose=True)
# Import, cache, and return the compiled module.
import renderutils_plugin
_cached_plugin = renderutils_plugin
return _cached_plugin
#----------------------------------------------------------------------------
# Internal kernels, just used for testing functionality
class _fresnel_shlick_func(torch.autograd.Function):
@staticmethod
def forward(ctx, f0, f90, cosTheta):
out = _get_plugin().fresnel_shlick_fwd(f0, f90, cosTheta, False)
ctx.save_for_backward(f0, f90, cosTheta)
return out
@staticmethod
def backward(ctx, dout):
f0, f90, cosTheta = ctx.saved_variables
return _get_plugin().fresnel_shlick_bwd(f0, f90, cosTheta, dout) + (None,)
def _fresnel_shlick(f0, f90, cosTheta, use_python=False):
if use_python:
out = bsdf_fresnel_shlick(f0, f90, cosTheta)
else:
out = _fresnel_shlick_func.apply(f0, f90, cosTheta)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of _fresnel_shlick contains inf or NaN"
return out
class _ndf_ggx_func(torch.autograd.Function):
@staticmethod
def forward(ctx, alphaSqr, cosTheta):
out = _get_plugin().ndf_ggx_fwd(alphaSqr, cosTheta, False)
ctx.save_for_backward(alphaSqr, cosTheta)
return out
@staticmethod
def backward(ctx, dout):
alphaSqr, cosTheta = ctx.saved_variables
return _get_plugin().ndf_ggx_bwd(alphaSqr, cosTheta, dout) + (None,)
def _ndf_ggx(alphaSqr, cosTheta, use_python=False):
if use_python:
out = bsdf_ndf_ggx(alphaSqr, cosTheta)
else:
out = _ndf_ggx_func.apply(alphaSqr, cosTheta)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of _ndf_ggx contains inf or NaN"
return out
class _lambda_ggx_func(torch.autograd.Function):
@staticmethod
def forward(ctx, alphaSqr, cosTheta):
out = _get_plugin().lambda_ggx_fwd(alphaSqr, cosTheta, False)
ctx.save_for_backward(alphaSqr, cosTheta)
return out
@staticmethod
def backward(ctx, dout):
alphaSqr, cosTheta = ctx.saved_variables
return _get_plugin().lambda_ggx_bwd(alphaSqr, cosTheta, dout) + (None,)
def _lambda_ggx(alphaSqr, cosTheta, use_python=False):
if use_python:
out = bsdf_lambda_ggx(alphaSqr, cosTheta)
else:
out = _lambda_ggx_func.apply(alphaSqr, cosTheta)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of _lambda_ggx contains inf or NaN"
return out
class _masking_smith_func(torch.autograd.Function):
@staticmethod
def forward(ctx, alphaSqr, cosThetaI, cosThetaO):
ctx.save_for_backward(alphaSqr, cosThetaI, cosThetaO)
out = _get_plugin().masking_smith_fwd(alphaSqr, cosThetaI, cosThetaO, False)
return out
@staticmethod
def backward(ctx, dout):
alphaSqr, cosThetaI, cosThetaO = ctx.saved_variables
return _get_plugin().masking_smith_bwd(alphaSqr, cosThetaI, cosThetaO, dout) + (None,)
def _masking_smith(alphaSqr, cosThetaI, cosThetaO, use_python=False):
if use_python:
out = bsdf_masking_smith_ggx_correlated(alphaSqr, cosThetaI, cosThetaO)
else:
out = _masking_smith_func.apply(alphaSqr, cosThetaI, cosThetaO)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of _masking_smith contains inf or NaN"
return out
#----------------------------------------------------------------------------
# Shading normal setup (bump mapping + bent normals)
class _prepare_shading_normal_func(torch.autograd.Function):
@staticmethod
def forward(ctx, pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl):
ctx.two_sided_shading, ctx.opengl = two_sided_shading, opengl
out = _get_plugin().prepare_shading_normal_fwd(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl, False)
ctx.save_for_backward(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm)
return out
@staticmethod
def backward(ctx, dout):
pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm = ctx.saved_variables
return _get_plugin().prepare_shading_normal_bwd(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, dout, ctx.two_sided_shading, ctx.opengl) + (None, None, None)
def prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading=True, opengl=True, use_python=False):
'''Takes care of all corner cases and produces a final normal used for shading:
- Constructs tangent space
- Flips normal direction based on geometric normal for two sided Shading
- Perturbs shading normal by normal map
- Bends backfacing normals towards the camera to avoid shading artifacts
All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent.
Args:
pos: World space g-buffer position.
view_pos: Camera position in world space (typically using broadcasting).
perturbed_nrm: Trangent-space normal perturbation from normal map lookup.
smooth_nrm: Interpolated vertex normals.
smooth_tng: Interpolated vertex tangents.
geom_nrm: Geometric (face) normals.
two_sided_shading: Use one/two sided shading
opengl: Use OpenGL/DirectX normal map conventions
use_python: Use PyTorch implementation (for validation)
Returns:
Final shading normal
'''
if perturbed_nrm is None:
perturbed_nrm = torch.tensor([0, 0, 1], dtype=torch.float32, device='cuda', requires_grad=False)[None, None, None, ...]
if use_python:
out = bsdf_prepare_shading_normal(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl)
else:
out = _prepare_shading_normal_func.apply(pos, view_pos, perturbed_nrm, smooth_nrm, smooth_tng, geom_nrm, two_sided_shading, opengl)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of prepare_shading_normal contains inf or NaN"
return out
#----------------------------------------------------------------------------
# BSDF functions
class _lambert_func(torch.autograd.Function):
@staticmethod
def forward(ctx, nrm, wi):
out = _get_plugin().lambert_fwd(nrm, wi, False)
ctx.save_for_backward(nrm, wi)
return out
@staticmethod
def backward(ctx, dout):
nrm, wi = ctx.saved_variables
return _get_plugin().lambert_bwd(nrm, wi, dout) + (None,)
def lambert(nrm, wi, use_python=False):
'''Lambertian bsdf.
All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent.
Args:
nrm: World space shading normal.
wi: World space light vector.
use_python: Use PyTorch implementation (for validation)
Returns:
Shaded diffuse value with shape [minibatch_size, height, width, 1]
'''
if use_python:
out = bsdf_lambert(nrm, wi)
else:
out = _lambert_func.apply(nrm, wi)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of lambert contains inf or NaN"
return out
class _frostbite_diffuse_func(torch.autograd.Function):
@staticmethod
def forward(ctx, nrm, wi, wo, linearRoughness):
out = _get_plugin().frostbite_fwd(nrm, wi, wo, linearRoughness, False)
ctx.save_for_backward(nrm, wi, wo, linearRoughness)
return out
@staticmethod
def backward(ctx, dout):
nrm, wi, wo, linearRoughness = ctx.saved_variables
return _get_plugin().frostbite_bwd(nrm, wi, wo, linearRoughness, dout) + (None,)
def frostbite_diffuse(nrm, wi, wo, linearRoughness, use_python=False):
'''Frostbite, normalized Disney Diffuse bsdf.
All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent.
Args:
nrm: World space shading normal.
wi: World space light vector.
wo: World space camera vector.
linearRoughness: Material roughness
use_python: Use PyTorch implementation (for validation)
Returns:
Shaded diffuse value with shape [minibatch_size, height, width, 1]
'''
if use_python:
out = bsdf_frostbite(nrm, wi, wo, linearRoughness)
else:
out = _frostbite_diffuse_func.apply(nrm, wi, wo, linearRoughness)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of lambert contains inf or NaN"
return out
class _pbr_specular_func(torch.autograd.Function):
@staticmethod
def forward(ctx, col, nrm, wo, wi, alpha, min_roughness):
ctx.save_for_backward(col, nrm, wo, wi, alpha)
ctx.min_roughness = min_roughness
out = _get_plugin().pbr_specular_fwd(col, nrm, wo, wi, alpha, min_roughness, False)
return out
@staticmethod
def backward(ctx, dout):
col, nrm, wo, wi, alpha = ctx.saved_variables
return _get_plugin().pbr_specular_bwd(col, nrm, wo, wi, alpha, ctx.min_roughness, dout) + (None, None)
def pbr_specular(col, nrm, wo, wi, alpha, min_roughness=0.08, use_python=False):
'''Physically-based specular bsdf.
All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent unless otherwise noted.
Args:
col: Specular lobe color
nrm: World space shading normal.
wo: World space camera vector.
wi: World space light vector
alpha: Specular roughness parameter with shape [minibatch_size, height, width, 1]
min_roughness: Scalar roughness clamping threshold
use_python: Use PyTorch implementation (for validation)
Returns:
Shaded specular color
'''
if use_python:
out = bsdf_pbr_specular(col, nrm, wo, wi, alpha, min_roughness=min_roughness)
else:
out = _pbr_specular_func.apply(col, nrm, wo, wi, alpha, min_roughness)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of pbr_specular contains inf or NaN"
return out
class _pbr_bsdf_func(torch.autograd.Function):
@staticmethod
def forward(ctx, kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF):
ctx.save_for_backward(kd, arm, pos, nrm, view_pos, light_pos)
ctx.min_roughness = min_roughness
ctx.BSDF = BSDF
out = _get_plugin().pbr_bsdf_fwd(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF, False)
return out
@staticmethod
def backward(ctx, dout):
kd, arm, pos, nrm, view_pos, light_pos = ctx.saved_variables
return _get_plugin().pbr_bsdf_bwd(kd, arm, pos, nrm, view_pos, light_pos, ctx.min_roughness, ctx.BSDF, dout) + (None, None, None)
def pbr_bsdf(kd, arm, pos, nrm, view_pos, light_pos, min_roughness=0.08, bsdf="lambert", use_python=False):
'''Physically-based bsdf, both diffuse & specular lobes
All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent unless otherwise noted.
Args:
kd: Diffuse albedo.
arm: Specular parameters (attenuation, linear roughness, metalness).
pos: World space position.
nrm: World space shading normal.
view_pos: Camera position in world space, typically using broadcasting.
light_pos: Light position in world space, typically using broadcasting.
min_roughness: Scalar roughness clamping threshold
bsdf: Controls diffuse BSDF, can be either 'lambert' or 'frostbite'
use_python: Use PyTorch implementation (for validation)
Returns:
Shaded color.
'''
BSDF = 0
if bsdf == 'frostbite':
BSDF = 1
if use_python:
out = bsdf_pbr(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF)
else:
out = _pbr_bsdf_func.apply(kd, arm, pos, nrm, view_pos, light_pos, min_roughness, BSDF)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of pbr_bsdf contains inf or NaN"
return out
#----------------------------------------------------------------------------
# cubemap filter with filtering across edges
class _diffuse_cubemap_func(torch.autograd.Function):
@staticmethod
def forward(ctx, cubemap):
out = _get_plugin().diffuse_cubemap_fwd(cubemap)
ctx.save_for_backward(cubemap)
return out
@staticmethod
def backward(ctx, dout):
cubemap, = ctx.saved_variables
cubemap_grad = _get_plugin().diffuse_cubemap_bwd(cubemap, dout)
return cubemap_grad, None
def diffuse_cubemap(cubemap, use_python=False):
if use_python:
assert False
else:
out = _diffuse_cubemap_func.apply(cubemap)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of diffuse_cubemap contains inf or NaN"
return out
class _specular_cubemap(torch.autograd.Function):
@staticmethod
def forward(ctx, cubemap, roughness, costheta_cutoff, bounds):
out = _get_plugin().specular_cubemap_fwd(cubemap, bounds, roughness, costheta_cutoff)
ctx.save_for_backward(cubemap, bounds)
ctx.roughness, ctx.theta_cutoff = roughness, costheta_cutoff
return out
@staticmethod
def backward(ctx, dout):
cubemap, bounds = ctx.saved_variables
cubemap_grad = _get_plugin().specular_cubemap_bwd(cubemap, bounds, dout, ctx.roughness, ctx.theta_cutoff)
return cubemap_grad, None, None, None
# Compute the bounds of the GGX NDF lobe to retain "cutoff" percent of the energy
def __ndfBounds(res, roughness, cutoff):
def ndfGGX(alphaSqr, costheta):
costheta = np.clip(costheta, 0.0, 1.0)
d = (costheta * alphaSqr - costheta) * costheta + 1.0
return alphaSqr / (d * d * np.pi)
# Sample out cutoff angle
nSamples = 1000000
costheta = np.cos(np.linspace(0, np.pi/2.0, nSamples))
D = np.cumsum(ndfGGX(roughness**4, costheta))
idx = np.argmax(D >= D[..., -1] * cutoff)
# Brute force compute lookup table with bounds
bounds = _get_plugin().specular_bounds(res, costheta[idx])
return costheta[idx], bounds
__ndfBoundsDict = {}
def specular_cubemap(cubemap, roughness, cutoff=0.99, use_python=False):
assert cubemap.shape[0] == 6 and cubemap.shape[1] == cubemap.shape[2], "Bad shape for cubemap tensor: %s" % str(cubemap.shape)
if use_python:
assert False
else:
key = (cubemap.shape[1], roughness, cutoff)
if key not in __ndfBoundsDict:
__ndfBoundsDict[key] = __ndfBounds(*key)
out = _specular_cubemap.apply(cubemap, roughness, *__ndfBoundsDict[key])
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of specular_cubemap contains inf or NaN"
return out[..., 0:3] / out[..., 3:]
#----------------------------------------------------------------------------
# Fast image loss function
class _image_loss_func(torch.autograd.Function):
@staticmethod
def forward(ctx, img, target, loss, tonemapper):
ctx.loss, ctx.tonemapper = loss, tonemapper
ctx.save_for_backward(img, target)
out = _get_plugin().image_loss_fwd(img, target, loss, tonemapper, False)
return out
@staticmethod
def backward(ctx, dout):
img, target = ctx.saved_variables
return _get_plugin().image_loss_bwd(img, target, dout, ctx.loss, ctx.tonemapper) + (None, None, None)
def image_loss(img, target, loss='l1', tonemapper='none', use_python=False):
'''Compute HDR image loss. Combines tonemapping and loss into a single kernel for better perf.
All tensors assume a shape of [minibatch_size, height, width, 3] or broadcastable equivalent unless otherwise noted.
Args:
img: Input image.
target: Target (reference) image.
loss: Type of loss. Valid options are ['l1', 'mse', 'smape', 'relmse']
tonemapper: Tonemapping operations. Valid options are ['none', 'log_srgb']
use_python: Use PyTorch implementation (for validation)
Returns:
Image space loss (scalar value).
'''
if use_python:
out = image_loss_fn(img, target, loss, tonemapper)
else:
out = _image_loss_func.apply(img, target, loss, tonemapper)
out = torch.sum(out) / (img.shape[0]*img.shape[1]*img.shape[2])
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of image_loss contains inf or NaN"
return out
#----------------------------------------------------------------------------
# Transform points function
class _xfm_func(torch.autograd.Function):
@staticmethod
def forward(ctx, points, matrix, isPoints):
ctx.save_for_backward(points, matrix)
ctx.isPoints = isPoints
return _get_plugin().xfm_fwd(points, matrix, isPoints, False)
@staticmethod
def backward(ctx, dout):
points, matrix = ctx.saved_variables
return (_get_plugin().xfm_bwd(points, matrix, dout, ctx.isPoints),) + (None, None, None)
def xfm_points(points, matrix, use_python=False):
'''Transform points.
Args:
points: Tensor containing 3D points with shape [minibatch_size, num_vertices, 3] or [1, num_vertices, 3]
matrix: A 4x4 transform matrix with shape [minibatch_size, 4, 4]
use_python: Use PyTorch's torch.matmul (for validation)
Returns:
Transformed points in homogeneous 4D with shape [minibatch_size, num_vertices, 4].
'''
if use_python:
out = torch.matmul(torch.nn.functional.pad(points, pad=(0,1), mode='constant', value=1.0), torch.transpose(matrix, 1, 2))
else:
out = _xfm_func.apply(points, matrix, True)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of xfm_points contains inf or NaN"
return out
def xfm_vectors(vectors, matrix, use_python=False):
'''Transform vectors.
Args:
vectors: Tensor containing 3D vectors with shape [minibatch_size, num_vertices, 3] or [1, num_vertices, 3]
matrix: A 4x4 transform matrix with shape [minibatch_size, 4, 4]
use_python: Use PyTorch's torch.matmul (for validation)
Returns:
Transformed vectors in homogeneous 4D with shape [minibatch_size, num_vertices, 4].
'''
if use_python:
out = torch.matmul(torch.nn.functional.pad(vectors, pad=(0,1), mode='constant', value=0.0), torch.transpose(matrix, 1, 2))[..., 0:3].contiguous()
else:
out = _xfm_func.apply(vectors, matrix, False)
if torch.is_anomaly_enabled():
assert torch.all(torch.isfinite(out)), "Output of xfm_vectors contains inf or NaN"
return out

View File

@@ -0,0 +1,296 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import torch
import os
import sys
sys.path.insert(0, os.path.join(sys.path[0], '../..'))
import renderutils as ru
RES = 4
DTYPE = torch.float32
def relative_loss(name, ref, cuda):
ref = ref.float()
cuda = cuda.float()
print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item())
def test_normal():
pos_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
pos_ref = pos_cuda.clone().detach().requires_grad_(True)
view_pos_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
view_pos_ref = view_pos_cuda.clone().detach().requires_grad_(True)
perturbed_nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
perturbed_nrm_ref = perturbed_nrm_cuda.clone().detach().requires_grad_(True)
smooth_nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
smooth_nrm_ref = smooth_nrm_cuda.clone().detach().requires_grad_(True)
smooth_tng_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
smooth_tng_ref = smooth_tng_cuda.clone().detach().requires_grad_(True)
geom_nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
geom_nrm_ref = geom_nrm_cuda.clone().detach().requires_grad_(True)
target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda')
ref = ru.prepare_shading_normal(pos_ref, view_pos_ref, perturbed_nrm_ref, smooth_nrm_ref, smooth_tng_ref, geom_nrm_ref, True, use_python=True)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru.prepare_shading_normal(pos_cuda, view_pos_cuda, perturbed_nrm_cuda, smooth_nrm_cuda, smooth_tng_cuda, geom_nrm_cuda, True)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" bent normal")
print("-------------------------------------------------------------")
relative_loss("res:", ref, cuda)
relative_loss("pos:", pos_ref.grad, pos_cuda.grad)
relative_loss("view_pos:", view_pos_ref.grad, view_pos_cuda.grad)
relative_loss("perturbed_nrm:", perturbed_nrm_ref.grad, perturbed_nrm_cuda.grad)
relative_loss("smooth_nrm:", smooth_nrm_ref.grad, smooth_nrm_cuda.grad)
relative_loss("smooth_tng:", smooth_tng_ref.grad, smooth_tng_cuda.grad)
relative_loss("geom_nrm:", geom_nrm_ref.grad, geom_nrm_cuda.grad)
def test_schlick():
f0_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
f0_ref = f0_cuda.clone().detach().requires_grad_(True)
f90_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
f90_ref = f90_cuda.clone().detach().requires_grad_(True)
cosT_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) * 2.0
cosT_cuda = cosT_cuda.clone().detach().requires_grad_(True)
cosT_ref = cosT_cuda.clone().detach().requires_grad_(True)
target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda')
ref = ru._fresnel_shlick(f0_ref, f90_ref, cosT_ref, use_python=True)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru._fresnel_shlick(f0_cuda, f90_cuda, cosT_cuda)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Fresnel shlick")
print("-------------------------------------------------------------")
relative_loss("res:", ref, cuda)
relative_loss("f0:", f0_ref.grad, f0_cuda.grad)
relative_loss("f90:", f90_ref.grad, f90_cuda.grad)
relative_loss("cosT:", cosT_ref.grad, cosT_cuda.grad)
def test_ndf_ggx():
alphaSqr_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
alphaSqr_cuda = alphaSqr_cuda.clone().detach().requires_grad_(True)
alphaSqr_ref = alphaSqr_cuda.clone().detach().requires_grad_(True)
cosT_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) * 3.0 - 1
cosT_cuda = cosT_cuda.clone().detach().requires_grad_(True)
cosT_ref = cosT_cuda.clone().detach().requires_grad_(True)
target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda')
ref = ru._ndf_ggx(alphaSqr_ref, cosT_ref, use_python=True)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru._ndf_ggx(alphaSqr_cuda, cosT_cuda)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Ndf GGX")
print("-------------------------------------------------------------")
relative_loss("res:", ref, cuda)
relative_loss("alpha:", alphaSqr_ref.grad, alphaSqr_cuda.grad)
relative_loss("cosT:", cosT_ref.grad, cosT_cuda.grad)
def test_lambda_ggx():
alphaSqr_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
alphaSqr_ref = alphaSqr_cuda.clone().detach().requires_grad_(True)
cosT_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True) * 3.0 - 1
cosT_cuda = cosT_cuda.clone().detach().requires_grad_(True)
cosT_ref = cosT_cuda.clone().detach().requires_grad_(True)
target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda')
ref = ru._lambda_ggx(alphaSqr_ref, cosT_ref, use_python=True)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru._lambda_ggx(alphaSqr_cuda, cosT_cuda)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Lambda GGX")
print("-------------------------------------------------------------")
relative_loss("res:", ref, cuda)
relative_loss("alpha:", alphaSqr_ref.grad, alphaSqr_cuda.grad)
relative_loss("cosT:", cosT_ref.grad, cosT_cuda.grad)
def test_masking_smith():
alphaSqr_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
alphaSqr_ref = alphaSqr_cuda.clone().detach().requires_grad_(True)
cosThetaI_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
cosThetaI_ref = cosThetaI_cuda.clone().detach().requires_grad_(True)
cosThetaO_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
cosThetaO_ref = cosThetaO_cuda.clone().detach().requires_grad_(True)
target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda')
ref = ru._masking_smith(alphaSqr_ref, cosThetaI_ref, cosThetaO_ref, use_python=True)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru._masking_smith(alphaSqr_cuda, cosThetaI_cuda, cosThetaO_cuda)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Smith masking term")
print("-------------------------------------------------------------")
relative_loss("res:", ref, cuda)
relative_loss("alpha:", alphaSqr_ref.grad, alphaSqr_cuda.grad)
relative_loss("cosThetaI:", cosThetaI_ref.grad, cosThetaI_cuda.grad)
relative_loss("cosThetaO:", cosThetaO_ref.grad, cosThetaO_cuda.grad)
def test_lambert():
normals_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
normals_ref = normals_cuda.clone().detach().requires_grad_(True)
wi_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
wi_ref = wi_cuda.clone().detach().requires_grad_(True)
target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda')
ref = ru.lambert(normals_ref, wi_ref, use_python=True)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru.lambert(normals_cuda, wi_cuda)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Lambert")
print("-------------------------------------------------------------")
relative_loss("res:", ref, cuda)
relative_loss("nrm:", normals_ref.grad, normals_cuda.grad)
relative_loss("wi:", wi_ref.grad, wi_cuda.grad)
def test_frostbite():
normals_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
normals_ref = normals_cuda.clone().detach().requires_grad_(True)
wi_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
wi_ref = wi_cuda.clone().detach().requires_grad_(True)
wo_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
wo_ref = wo_cuda.clone().detach().requires_grad_(True)
rough_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
rough_ref = rough_cuda.clone().detach().requires_grad_(True)
target = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda')
ref = ru.frostbite_diffuse(normals_ref, wi_ref, wo_ref, rough_ref, use_python=True)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru.frostbite_diffuse(normals_cuda, wi_cuda, wo_cuda, rough_cuda)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Frostbite")
print("-------------------------------------------------------------")
relative_loss("res:", ref, cuda)
relative_loss("nrm:", normals_ref.grad, normals_cuda.grad)
relative_loss("wo:", wo_ref.grad, wo_cuda.grad)
relative_loss("wi:", wi_ref.grad, wi_cuda.grad)
relative_loss("rough:", rough_ref.grad, rough_cuda.grad)
def test_pbr_specular():
col_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
col_ref = col_cuda.clone().detach().requires_grad_(True)
nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
nrm_ref = nrm_cuda.clone().detach().requires_grad_(True)
wi_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
wi_ref = wi_cuda.clone().detach().requires_grad_(True)
wo_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
wo_ref = wo_cuda.clone().detach().requires_grad_(True)
alpha_cuda = torch.rand(1, RES, RES, 1, dtype=DTYPE, device='cuda', requires_grad=True)
alpha_ref = alpha_cuda.clone().detach().requires_grad_(True)
target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda')
ref = ru.pbr_specular(col_ref, nrm_ref, wo_ref, wi_ref, alpha_ref, use_python=True)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru.pbr_specular(col_cuda, nrm_cuda, wo_cuda, wi_cuda, alpha_cuda)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Pbr specular")
print("-------------------------------------------------------------")
relative_loss("res:", ref, cuda)
if col_ref.grad is not None:
relative_loss("col:", col_ref.grad, col_cuda.grad)
if nrm_ref.grad is not None:
relative_loss("nrm:", nrm_ref.grad, nrm_cuda.grad)
if wi_ref.grad is not None:
relative_loss("wi:", wi_ref.grad, wi_cuda.grad)
if wo_ref.grad is not None:
relative_loss("wo:", wo_ref.grad, wo_cuda.grad)
if alpha_ref.grad is not None:
relative_loss("alpha:", alpha_ref.grad, alpha_cuda.grad)
def test_pbr_bsdf(bsdf):
kd_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
kd_ref = kd_cuda.clone().detach().requires_grad_(True)
arm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
arm_ref = arm_cuda.clone().detach().requires_grad_(True)
pos_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
pos_ref = pos_cuda.clone().detach().requires_grad_(True)
nrm_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
nrm_ref = nrm_cuda.clone().detach().requires_grad_(True)
view_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
view_ref = view_cuda.clone().detach().requires_grad_(True)
light_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
light_ref = light_cuda.clone().detach().requires_grad_(True)
target = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda')
ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True, bsdf=bsdf)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda, bsdf=bsdf)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Pbr BSDF")
print("-------------------------------------------------------------")
relative_loss("res:", ref, cuda)
if kd_ref.grad is not None:
relative_loss("kd:", kd_ref.grad, kd_cuda.grad)
if arm_ref.grad is not None:
relative_loss("arm:", arm_ref.grad, arm_cuda.grad)
if pos_ref.grad is not None:
relative_loss("pos:", pos_ref.grad, pos_cuda.grad)
if nrm_ref.grad is not None:
relative_loss("nrm:", nrm_ref.grad, nrm_cuda.grad)
if view_ref.grad is not None:
relative_loss("view:", view_ref.grad, view_cuda.grad)
if light_ref.grad is not None:
relative_loss("light:", light_ref.grad, light_cuda.grad)
test_normal()
test_schlick()
test_ndf_ggx()
test_lambda_ggx()
test_masking_smith()
test_lambert()
test_frostbite()
test_pbr_specular()
test_pbr_bsdf('lambert')
test_pbr_bsdf('frostbite')

View File

@@ -0,0 +1,47 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import torch
import os
import sys
sys.path.insert(0, os.path.join(sys.path[0], '../..'))
import renderutils as ru
RES = 4
DTYPE = torch.float32
def relative_loss(name, ref, cuda):
ref = ref.float()
cuda = cuda.float()
print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item())
def test_cubemap():
cubemap_cuda = torch.rand(6, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
cubemap_ref = cubemap_cuda.clone().detach().requires_grad_(True)
weights = torch.rand(3, 3, 1, dtype=DTYPE, device='cuda')
target = torch.rand(6, RES, RES, 3, dtype=DTYPE, device='cuda')
ref = ru.filter_cubemap(cubemap_ref, weights, use_python=True)
ref_loss = torch.nn.MSELoss()(ref, target)
ref_loss.backward()
cuda = ru.filter_cubemap(cubemap_cuda, weights, use_python=False)
cuda_loss = torch.nn.MSELoss()(cuda, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Cubemap:")
print("-------------------------------------------------------------")
relative_loss("flt:", ref, cuda)
relative_loss("cubemap:", cubemap_ref.grad, cubemap_cuda.grad)
test_cubemap()

View File

@@ -0,0 +1,61 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import torch
import os
import sys
sys.path.insert(0, os.path.join(sys.path[0], '../..'))
import renderutils as ru
RES = 8
DTYPE = torch.float32
def tonemap_srgb(f):
return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
def l1(output, target):
x = torch.clamp(output, min=0, max=65535)
r = torch.clamp(target, min=0, max=65535)
x = tonemap_srgb(torch.log(x + 1))
r = tonemap_srgb(torch.log(r + 1))
return torch.nn.functional.l1_loss(x,r)
def relative_loss(name, ref, cuda):
ref = ref.float()
cuda = cuda.float()
print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref + 1e-7)).item())
def test_loss(loss, tonemapper):
img_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
img_ref = img_cuda.clone().detach().requires_grad_(True)
target_cuda = torch.rand(1, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
target_ref = target_cuda.clone().detach().requires_grad_(True)
ref_loss = ru.image_loss(img_ref, target_ref, loss=loss, tonemapper=tonemapper, use_python=True)
ref_loss.backward()
cuda_loss = ru.image_loss(img_cuda, target_cuda, loss=loss, tonemapper=tonemapper)
cuda_loss.backward()
print("-------------------------------------------------------------")
print(" Loss: %s, %s" % (loss, tonemapper))
print("-------------------------------------------------------------")
relative_loss("res:", ref_loss, cuda_loss)
relative_loss("img:", img_ref.grad, img_cuda.grad)
relative_loss("target:", target_ref.grad, target_cuda.grad)
test_loss('l1', 'none')
test_loss('l1', 'log_srgb')
test_loss('mse', 'log_srgb')
test_loss('smape', 'none')
test_loss('relmse', 'none')
test_loss('mse', 'none')

View File

@@ -0,0 +1,90 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import torch
import os
import sys
sys.path.insert(0, os.path.join(sys.path[0], '../..'))
import renderutils as ru
BATCH = 8
RES = 1024
DTYPE = torch.float32
torch.manual_seed(0)
def tonemap_srgb(f):
return torch.where(f > 0.0031308, torch.pow(torch.clamp(f, min=0.0031308), 1.0/2.4)*1.055 - 0.055, 12.92*f)
def l1(output, target):
x = torch.clamp(output, min=0, max=65535)
r = torch.clamp(target, min=0, max=65535)
x = tonemap_srgb(torch.log(x + 1))
r = tonemap_srgb(torch.log(r + 1))
return torch.nn.functional.l1_loss(x,r)
def relative_loss(name, ref, cuda):
ref = ref.float()
cuda = cuda.float()
print(name, torch.max(torch.abs(ref - cuda) / torch.abs(ref)).item())
def test_xfm_points():
points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
points_ref = points_cuda.clone().detach().requires_grad_(True)
mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
ref_out = ru.xfm_points(points_ref, mtx_ref, use_python=True)
ref_loss = torch.nn.MSELoss()(ref_out, target)
ref_loss.backward()
cuda_out = ru.xfm_points(points_cuda, mtx_cuda)
cuda_loss = torch.nn.MSELoss()(cuda_out, target)
cuda_loss.backward()
print("-------------------------------------------------------------")
relative_loss("res:", ref_out, cuda_out)
relative_loss("points:", points_ref.grad, points_cuda.grad)
def test_xfm_vectors():
points_cuda = torch.rand(1, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
points_ref = points_cuda.clone().detach().requires_grad_(True)
points_cuda_p = points_cuda.clone().detach().requires_grad_(True)
points_ref_p = points_cuda.clone().detach().requires_grad_(True)
mtx_cuda = torch.rand(BATCH, 4, 4, dtype=DTYPE, device='cuda', requires_grad=False)
mtx_ref = mtx_cuda.clone().detach().requires_grad_(True)
target = torch.rand(BATCH, RES, 4, dtype=DTYPE, device='cuda', requires_grad=True)
ref_out = ru.xfm_vectors(points_ref.contiguous(), mtx_ref, use_python=True)
ref_loss = torch.nn.MSELoss()(ref_out, target[..., 0:3])
ref_loss.backward()
cuda_out = ru.xfm_vectors(points_cuda.contiguous(), mtx_cuda)
cuda_loss = torch.nn.MSELoss()(cuda_out, target[..., 0:3])
cuda_loss.backward()
ref_out_p = ru.xfm_points(points_ref_p.contiguous(), mtx_ref, use_python=True)
ref_loss_p = torch.nn.MSELoss()(ref_out_p, target)
ref_loss_p.backward()
cuda_out_p = ru.xfm_points(points_cuda_p.contiguous(), mtx_cuda)
cuda_loss_p = torch.nn.MSELoss()(cuda_out_p, target)
cuda_loss_p.backward()
print("-------------------------------------------------------------")
relative_loss("res:", ref_out, cuda_out)
relative_loss("points:", points_ref.grad, points_cuda.grad)
relative_loss("points_p:", points_ref_p.grad, points_cuda_p.grad)
test_xfm_points()
test_xfm_vectors()

View File

@@ -0,0 +1,57 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import torch
import os
import sys
sys.path.insert(0, os.path.join(sys.path[0], '../..'))
import renderutils as ru
DTYPE=torch.float32
def test_bsdf(BATCH, RES, ITR):
kd_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
kd_ref = kd_cuda.clone().detach().requires_grad_(True)
arm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
arm_ref = arm_cuda.clone().detach().requires_grad_(True)
pos_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
pos_ref = pos_cuda.clone().detach().requires_grad_(True)
nrm_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
nrm_ref = nrm_cuda.clone().detach().requires_grad_(True)
view_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
view_ref = view_cuda.clone().detach().requires_grad_(True)
light_cuda = torch.rand(BATCH, RES, RES, 3, dtype=DTYPE, device='cuda', requires_grad=True)
light_ref = light_cuda.clone().detach().requires_grad_(True)
target = torch.rand(BATCH, RES, RES, 3, device='cuda')
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
print("--- Testing: [%d, %d, %d] ---" % (BATCH, RES, RES))
start.record()
for i in range(ITR):
ref = ru.pbr_bsdf(kd_ref, arm_ref, pos_ref, nrm_ref, view_ref, light_ref, use_python=True)
end.record()
torch.cuda.synchronize()
print("Pbr BSDF python:", start.elapsed_time(end))
start.record()
for i in range(ITR):
cuda = ru.pbr_bsdf(kd_cuda, arm_cuda, pos_cuda, nrm_cuda, view_cuda, light_cuda)
end.record()
torch.cuda.synchronize()
print("Pbr BSDF cuda:", start.elapsed_time(end))
test_bsdf(1, 512, 1000)
test_bsdf(16, 512, 1000)
test_bsdf(1, 2048, 1000)

186
render/texture.py Normal file
View File

@@ -0,0 +1,186 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import os
import numpy as np
import torch
import nvdiffrast.torch as dr
from . import util
######################################################################################
# Smooth pooling / mip computation with linear gradient upscaling
######################################################################################
class texture2d_mip(torch.autograd.Function):
@staticmethod
def forward(ctx, texture):
return util.avg_pool_nhwc(texture, (2,2))
@staticmethod
def backward(ctx, dout):
gy, gx = torch.meshgrid(torch.linspace(0.0 + 0.25 / dout.shape[1], 1.0 - 0.25 / dout.shape[1], dout.shape[1]*2, device="cuda"),
torch.linspace(0.0 + 0.25 / dout.shape[2], 1.0 - 0.25 / dout.shape[2], dout.shape[2]*2, device="cuda"),
) # indexing='ij')
uv = torch.stack((gx, gy), dim=-1)
return dr.texture(dout * 0.25, uv[None, ...].contiguous(), filter_mode='linear', boundary_mode='clamp')
########################################################################################################
# Simple texture class. A texture can be either
# - A 3D tensor (using auto mipmaps)
# - A list of 3D tensors (full custom mip hierarchy)
########################################################################################################
class Texture2D(torch.nn.Module):
# Initializes a texture from image data.
# Input can be constant value (1D array) or texture (3D array) or mip hierarchy (list of 3d arrays)
def __init__(self, init, min_max=None):
super(Texture2D, self).__init__()
if isinstance(init, np.ndarray):
init = torch.tensor(init, dtype=torch.float32, device='cuda')
elif isinstance(init, list) and len(init) == 1:
init = init[0]
if isinstance(init, list):
self.data = list(torch.nn.Parameter(mip.clone().detach(), requires_grad=True) for mip in init)
elif len(init.shape) == 4:
self.data = torch.nn.Parameter(init.clone().detach(), requires_grad=True)
elif len(init.shape) == 3:
self.data = torch.nn.Parameter(init[None, ...].clone().detach(), requires_grad=True)
elif len(init.shape) == 1:
self.data = torch.nn.Parameter(init[None, None, None, :].clone().detach(), requires_grad=True) # Convert constant to 1x1 tensor
else:
assert False, "Invalid texture object"
self.min_max = min_max
# Filtered (trilinear) sample texture at a given location
def sample(self, texc, texc_deriv, filter_mode='linear-mipmap-linear'):
if isinstance(self.data, list):
out = dr.texture(self.data[0], texc, texc_deriv, mip=self.data[1:], filter_mode=filter_mode)
else:
if self.data.shape[1] > 1 and self.data.shape[2] > 1:
mips = [self.data]
while mips[-1].shape[1] > 1 and mips[-1].shape[2] > 1:
mips += [texture2d_mip.apply(mips[-1])]
out = dr.texture(mips[0], texc, texc_deriv, mip=mips[1:], filter_mode=filter_mode)
else:
out = dr.texture(self.data, texc, texc_deriv, filter_mode=filter_mode)
return out
def getRes(self):
return self.getMips()[0].shape[1:3]
def getChannels(self):
return self.getMips()[0].shape[3]
def getMips(self):
if isinstance(self.data, list):
return self.data
else:
return [self.data]
# In-place clamp with no derivative to make sure values are in valid range after training
def clamp_(self):
if self.min_max is not None:
for mip in self.getMips():
for i in range(mip.shape[-1]):
mip[..., i].clamp_(min=self.min_max[0][i], max=self.min_max[1][i])
# In-place clamp with no derivative to make sure values are in valid range after training
def normalize_(self):
with torch.no_grad():
for mip in self.getMips():
mip = util.safe_normalize(mip)
########################################################################################################
# Helper function to create a trainable texture from a regular texture. The trainable weights are
# initialized with texture data as an initial guess
########################################################################################################
def create_trainable(init, res=None, auto_mipmaps=True, min_max=None):
with torch.no_grad():
if isinstance(init, Texture2D):
assert isinstance(init.data, torch.Tensor)
min_max = init.min_max if min_max is None else min_max
init = init.data
elif isinstance(init, np.ndarray):
init = torch.tensor(init, dtype=torch.float32, device='cuda')
# Pad to NHWC if needed
if len(init.shape) == 1: # Extend constant to NHWC tensor
init = init[None, None, None, :]
elif len(init.shape) == 3:
init = init[None, ...]
# Scale input to desired resolution.
if res is not None:
init = util.scale_img_nhwc(init, res)
# Genreate custom mipchain
if not auto_mipmaps:
mip_chain = [init.clone().detach().requires_grad_(True)]
while mip_chain[-1].shape[1] > 1 or mip_chain[-1].shape[2] > 1:
new_size = [max(mip_chain[-1].shape[1] // 2, 1), max(mip_chain[-1].shape[2] // 2, 1)]
mip_chain += [util.scale_img_nhwc(mip_chain[-1], new_size)]
return Texture2D(mip_chain, min_max=min_max)
else:
return Texture2D(init, min_max=min_max)
########################################################################################################
# Convert texture to and from SRGB
########################################################################################################
def srgb_to_rgb(texture):
return Texture2D(list(util.srgb_to_rgb(mip) for mip in texture.getMips()))
def rgb_to_srgb(texture):
return Texture2D(list(util.rgb_to_srgb(mip) for mip in texture.getMips()))
########################################################################################################
# Utility functions for loading / storing a texture
########################################################################################################
def _load_mip2D(fn, lambda_fn=None, channels=None):
imgdata = torch.tensor(util.load_image(fn), dtype=torch.float32, device='cuda')
if channels is not None:
imgdata = imgdata[..., 0:channels]
if lambda_fn is not None:
imgdata = lambda_fn(imgdata)
return imgdata.detach().clone()
def load_texture2D(fn, lambda_fn=None, channels=None):
base, ext = os.path.splitext(fn)
if os.path.exists(base + "_0" + ext):
mips = []
while os.path.exists(base + ("_%d" % len(mips)) + ext):
mips += [_load_mip2D(base + ("_%d" % len(mips)) + ext, lambda_fn, channels)]
return Texture2D(mips)
else:
return Texture2D(_load_mip2D(fn, lambda_fn, channels))
def _save_mip2D(fn, mip, mipidx, lambda_fn):
if lambda_fn is not None:
data = lambda_fn(mip).detach().cpu().numpy()
else:
data = mip.detach().cpu().numpy()
if mipidx is None:
util.save_image(fn, data)
else:
base, ext = os.path.splitext(fn)
util.save_image(base + ("_%d" % mipidx) + ext, data)
def save_texture2D(fn, tex, lambda_fn=None):
if isinstance(tex.data, list):
for i, mip in enumerate(tex.data):
_save_mip2D(fn, mip[0,...], i, lambda_fn)
else:
_save_mip2D(fn, tex.data[0,...], None, lambda_fn)

465
render/util.py Normal file
View File

@@ -0,0 +1,465 @@
# Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
import os
import numpy as np
import torch
import nvdiffrast.torch as dr
import imageio
#----------------------------------------------------------------------------
# Vector operations
#----------------------------------------------------------------------------
def dot(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
return torch.sum(x*y, -1, keepdim=True)
def reflect(x: torch.Tensor, n: torch.Tensor) -> torch.Tensor:
return 2*dot(x, n)*n - x
def length(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
return torch.sqrt(torch.clamp(dot(x,x), min=eps)) # Clamp to avoid nan gradients because grad(sqrt(0)) = NaN
def safe_normalize(x: torch.Tensor, eps: float =1e-20) -> torch.Tensor:
return x / length(x, eps)
def to_hvec(x: torch.Tensor, w: float) -> torch.Tensor:
return torch.nn.functional.pad(x, pad=(0,1), mode='constant', value=w)
#----------------------------------------------------------------------------
# sRGB color transforms
#----------------------------------------------------------------------------
def _rgb_to_srgb(f: torch.Tensor) -> torch.Tensor:
return torch.where(f <= 0.0031308, f * 12.92, torch.pow(torch.clamp(f, 0.0031308), 1.0/2.4)*1.055 - 0.055)
def rgb_to_srgb(f: torch.Tensor) -> torch.Tensor:
assert f.shape[-1] == 3 or f.shape[-1] == 4
out = torch.cat((_rgb_to_srgb(f[..., 0:3]), f[..., 3:4]), dim=-1) if f.shape[-1] == 4 else _rgb_to_srgb(f)
assert out.shape[0] == f.shape[0] and out.shape[1] == f.shape[1] and out.shape[2] == f.shape[2]
return out
def _srgb_to_rgb(f: torch.Tensor) -> torch.Tensor:
return torch.where(f <= 0.04045, f / 12.92, torch.pow((torch.clamp(f, 0.04045) + 0.055) / 1.055, 2.4))
def srgb_to_rgb(f: torch.Tensor) -> torch.Tensor:
assert f.shape[-1] == 3 or f.shape[-1] == 4
out = torch.cat((_srgb_to_rgb(f[..., 0:3]), f[..., 3:4]), dim=-1) if f.shape[-1] == 4 else _srgb_to_rgb(f)
assert out.shape[0] == f.shape[0] and out.shape[1] == f.shape[1] and out.shape[2] == f.shape[2]
return out
def reinhard(f: torch.Tensor) -> torch.Tensor:
return f/(1+f)
#-----------------------------------------------------------------------------------
# Metrics (taken from jaxNerf source code, in order to replicate their measurements)
#
# https://github.com/google-research/google-research/blob/301451a62102b046bbeebff49a760ebeec9707b8/jaxnerf/nerf/utils.py#L266
#
#-----------------------------------------------------------------------------------
def mse_to_psnr(mse):
"""Compute PSNR given an MSE (we assume the maximum pixel value is 1)."""
return -10. / np.log(10.) * np.log(mse)
def psnr_to_mse(psnr):
"""Compute MSE given a PSNR (we assume the maximum pixel value is 1)."""
return np.exp(-0.1 * np.log(10.) * psnr)
#----------------------------------------------------------------------------
# Displacement texture lookup
#----------------------------------------------------------------------------
def get_miplevels(texture: np.ndarray) -> float:
minDim = min(texture.shape[0], texture.shape[1])
return np.floor(np.log2(minDim))
def tex_2d(tex_map : torch.Tensor, coords : torch.Tensor, filter='nearest') -> torch.Tensor:
tex_map = tex_map[None, ...] # Add batch dimension
tex_map = tex_map.permute(0, 3, 1, 2) # NHWC -> NCHW
tex = torch.nn.functional.grid_sample(tex_map, coords[None, None, ...] * 2 - 1, mode=filter, align_corners=False)
tex = tex.permute(0, 2, 3, 1) # NCHW -> NHWC
return tex[0, 0, ...]
#----------------------------------------------------------------------------
# Cubemap utility functions
#----------------------------------------------------------------------------
def cube_to_dir(s, x, y):
if s == 0: rx, ry, rz = torch.ones_like(x), -y, -x
elif s == 1: rx, ry, rz = -torch.ones_like(x), -y, x
elif s == 2: rx, ry, rz = x, torch.ones_like(x), y
elif s == 3: rx, ry, rz = x, -torch.ones_like(x), -y
elif s == 4: rx, ry, rz = x, -y, torch.ones_like(x)
elif s == 5: rx, ry, rz = -x, -y, -torch.ones_like(x)
return torch.stack((rx, ry, rz), dim=-1)
def latlong_to_cubemap(latlong_map, res):
cubemap = torch.zeros(6, res[0], res[1], latlong_map.shape[-1], dtype=torch.float32, device='cuda')
for s in range(6):
gy, gx = torch.meshgrid(torch.linspace(-1.0 + 1.0 / res[0], 1.0 - 1.0 / res[0], res[0], device='cuda'),
torch.linspace(-1.0 + 1.0 / res[1], 1.0 - 1.0 / res[1], res[1], device='cuda'),
) # indexing='ij')
v = safe_normalize(cube_to_dir(s, gx, gy))
tu = torch.atan2(v[..., 0:1], -v[..., 2:3]) / (2 * np.pi) + 0.5
tv = torch.acos(torch.clamp(v[..., 1:2], min=-1, max=1)) / np.pi
texcoord = torch.cat((tu, tv), dim=-1)
cubemap[s, ...] = dr.texture(latlong_map[None, ...], texcoord[None, ...], filter_mode='linear')[0]
return cubemap
def cubemap_to_latlong(cubemap, res):
gy, gx = torch.meshgrid(torch.linspace( 0.0 + 1.0 / res[0], 1.0 - 1.0 / res[0], res[0], device='cuda'),
torch.linspace(-1.0 + 1.0 / res[1], 1.0 - 1.0 / res[1], res[1], device='cuda'),
) # indexing='ij')
sintheta, costheta = torch.sin(gy*np.pi), torch.cos(gy*np.pi)
sinphi, cosphi = torch.sin(gx*np.pi), torch.cos(gx*np.pi)
reflvec = torch.stack((
sintheta*sinphi,
costheta,
-sintheta*cosphi
), dim=-1)
return dr.texture(cubemap[None, ...], reflvec[None, ...].contiguous(), filter_mode='linear', boundary_mode='cube')[0]
#----------------------------------------------------------------------------
# Image scaling
#----------------------------------------------------------------------------
def scale_img_hwc(x : torch.Tensor, size, mag='bilinear', min='area') -> torch.Tensor:
return scale_img_nhwc(x[None, ...], size, mag, min)[0]
def scale_img_nhwc(x : torch.Tensor, size, mag='bilinear', min='area') -> torch.Tensor:
assert (x.shape[1] >= size[0] and x.shape[2] >= size[1]) or (x.shape[1] < size[0] and x.shape[2] < size[1]), "Trying to magnify image in one dimension and minify in the other"
y = x.permute(0, 3, 1, 2) # NHWC -> NCHW
if x.shape[1] > size[0] and x.shape[2] > size[1]: # Minification, previous size was bigger
y = torch.nn.functional.interpolate(y, size, mode=min)
else: # Magnification
if mag == 'bilinear' or mag == 'bicubic':
y = torch.nn.functional.interpolate(y, size, mode=mag, align_corners=True)
else:
y = torch.nn.functional.interpolate(y, size, mode=mag)
return y.permute(0, 2, 3, 1).contiguous() # NCHW -> NHWC
def avg_pool_nhwc(x : torch.Tensor, size) -> torch.Tensor:
y = x.permute(0, 3, 1, 2) # NHWC -> NCHW
y = torch.nn.functional.avg_pool2d(y, size)
return y.permute(0, 2, 3, 1).contiguous() # NCHW -> NHWC
#----------------------------------------------------------------------------
# Behaves similar to tf.segment_sum
#----------------------------------------------------------------------------
def segment_sum(data: torch.Tensor, segment_ids: torch.Tensor) -> torch.Tensor:
num_segments = torch.unique_consecutive(segment_ids).shape[0]
# Repeats ids until same dimension as data
if len(segment_ids.shape) == 1:
s = torch.prod(torch.tensor(data.shape[1:], dtype=torch.int64, device='cuda')).long()
segment_ids = segment_ids.repeat_interleave(s).view(segment_ids.shape[0], *data.shape[1:])
assert data.shape == segment_ids.shape, "data.shape and segment_ids.shape should be equal"
shape = [num_segments] + list(data.shape[1:])
result = torch.zeros(*shape, dtype=torch.float32, device='cuda')
result = result.scatter_add(0, segment_ids, data)
return result
#----------------------------------------------------------------------------
# Matrix helpers.
#----------------------------------------------------------------------------
def fovx_to_fovy(fovx, aspect):
return np.arctan(np.tan(fovx / 2) / aspect) * 2.0
def focal_length_to_fovy(focal_length, sensor_height):
return 2 * np.arctan(0.5 * sensor_height / focal_length)
# Reworked so this matches gluPerspective / glm::perspective, using fovy
def perspective(fovy=0.7854, aspect=1.0, n=0.1, f=1000.0, device=None):
y = np.tan(fovy / 2)
return torch.tensor([[1/(y*aspect), 0, 0, 0],
[ 0, 1/-y, 0, 0],
[ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)],
[ 0, 0, -1, 0]], dtype=torch.float32, device=device)
# Reworked so this matches gluPerspective / glm::perspective, using fovy
def perspective_offcenter(fovy, fraction, rx, ry, aspect=1.0, n=0.1, f=1000.0, device=None):
y = np.tan(fovy / 2)
# Full frustum
R, L = aspect*y, -aspect*y
T, B = y, -y
# Create a randomized sub-frustum
width = (R-L)*fraction
height = (T-B)*fraction
xstart = (R-L)*rx
ystart = (T-B)*ry
l = L + xstart
r = l + width
b = B + ystart
t = b + height
# https://www.scratchapixel.com/lessons/3d-basic-rendering/perspective-and-orthographic-projection-matrix/opengl-perspective-projection-matrix
return torch.tensor([[2/(r-l), 0, (r+l)/(r-l), 0],
[ 0, -2/(t-b), (t+b)/(t-b), 0],
[ 0, 0, -(f+n)/(f-n), -(2*f*n)/(f-n)],
[ 0, 0, -1, 0]], dtype=torch.float32, device=device)
def translate(x, y, z, device=None):
return torch.tensor([[1, 0, 0, x],
[0, 1, 0, y],
[0, 0, 1, z],
[0, 0, 0, 1]], dtype=torch.float32, device=device)
def rotate_x(a, device=None):
s, c = np.sin(a), np.cos(a)
return torch.tensor([[1, 0, 0, 0],
[0, c, s, 0],
[0, -s, c, 0],
[0, 0, 0, 1]], dtype=torch.float32, device=device)
def rotate_y(a, device=None):
s, c = np.sin(a), np.cos(a)
return torch.tensor([[ c, 0, s, 0],
[ 0, 1, 0, 0],
[-s, 0, c, 0],
[ 0, 0, 0, 1]], dtype=torch.float32, device=device)
def scale(s, device=None):
return torch.tensor([[ s, 0, 0, 0],
[ 0, s, 0, 0],
[ 0, 0, s, 0],
[ 0, 0, 0, 1]], dtype=torch.float32, device=device)
def lookAt(eye, at, up):
a = eye - at
w = a / torch.linalg.norm(a)
u = torch.cross(up, w)
u = u / torch.linalg.norm(u)
v = torch.cross(w, u)
translate = torch.tensor([[1, 0, 0, -eye[0]],
[0, 1, 0, -eye[1]],
[0, 0, 1, -eye[2]],
[0, 0, 0, 1]], dtype=eye.dtype, device=eye.device)
rotate = torch.tensor([[u[0], u[1], u[2], 0],
[v[0], v[1], v[2], 0],
[w[0], w[1], w[2], 0],
[0, 0, 0, 1]], dtype=eye.dtype, device=eye.device)
return rotate @ translate
@torch.no_grad()
def random_rotation_translation(t, device=None):
m = np.random.normal(size=[3, 3])
m[1] = np.cross(m[0], m[2])
m[2] = np.cross(m[0], m[1])
m = m / np.linalg.norm(m, axis=1, keepdims=True)
m = np.pad(m, [[0, 1], [0, 1]], mode='constant')
m[3, 3] = 1.0
m[:3, 3] = np.random.uniform(-t, t, size=[3])
return torch.tensor(m, dtype=torch.float32, device=device)
@torch.no_grad()
def random_rotation(device=None):
m = np.random.normal(size=[3, 3])
m[1] = np.cross(m[0], m[2])
m[2] = np.cross(m[0], m[1])
m = m / np.linalg.norm(m, axis=1, keepdims=True)
m = np.pad(m, [[0, 1], [0, 1]], mode='constant')
m[3, 3] = 1.0
m[:3, 3] = np.array([0,0,0]).astype(np.float32)
return torch.tensor(m, dtype=torch.float32, device=device)
#----------------------------------------------------------------------------
# Compute focal points of a set of lines using least squares.
# handy for poorly centered datasets
#----------------------------------------------------------------------------
def lines_focal(o, d):
d = safe_normalize(d)
I = torch.eye(3, dtype=o.dtype, device=o.device)
S = torch.sum(d[..., None] @ torch.transpose(d[..., None], 1, 2) - I[None, ...], dim=0)
C = torch.sum((d[..., None] @ torch.transpose(d[..., None], 1, 2) - I[None, ...]) @ o[..., None], dim=0).squeeze(1)
return torch.linalg.pinv(S) @ C
#----------------------------------------------------------------------------
# Cosine sample around a vector N
#----------------------------------------------------------------------------
@torch.no_grad()
def cosine_sample(N, size=None):
# construct local frame
N = N/torch.linalg.norm(N)
dx0 = torch.tensor([0, N[2], -N[1]], dtype=N.dtype, device=N.device)
dx1 = torch.tensor([-N[2], 0, N[0]], dtype=N.dtype, device=N.device)
dx = torch.where(dot(dx0, dx0) > dot(dx1, dx1), dx0, dx1)
#dx = dx0 if np.dot(dx0,dx0) > np.dot(dx1,dx1) else dx1
dx = dx / torch.linalg.norm(dx)
dy = torch.cross(N,dx)
dy = dy / torch.linalg.norm(dy)
# cosine sampling in local frame
if size is None:
phi = 2.0 * np.pi * np.random.uniform()
s = np.random.uniform()
else:
phi = 2.0 * np.pi * torch.rand(*size, 1, dtype=N.dtype, device=N.device)
s = torch.rand(*size, 1, dtype=N.dtype, device=N.device)
costheta = np.sqrt(s)
sintheta = np.sqrt(1.0 - s)
# cartesian vector in local space
x = np.cos(phi)*sintheta
y = np.sin(phi)*sintheta
z = costheta
# local to world
return dx*x + dy*y + N*z
#----------------------------------------------------------------------------
# Bilinear downsample by 2x.
#----------------------------------------------------------------------------
def bilinear_downsample(x : torch.tensor) -> torch.Tensor:
w = torch.tensor([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=torch.float32, device=x.device) / 64.0
w = w.expand(x.shape[-1], 1, 4, 4)
x = torch.nn.functional.conv2d(x.permute(0, 3, 1, 2), w, padding=1, stride=2, groups=x.shape[-1])
return x.permute(0, 2, 3, 1)
#----------------------------------------------------------------------------
# Bilinear downsample log(spp) steps
#----------------------------------------------------------------------------
def bilinear_downsample(x : torch.tensor, spp) -> torch.Tensor:
w = torch.tensor([[1, 3, 3, 1], [3, 9, 9, 3], [3, 9, 9, 3], [1, 3, 3, 1]], dtype=torch.float32, device=x.device) / 64.0
g = x.shape[-1]
w = w.expand(g, 1, 4, 4)
x = x.permute(0, 3, 1, 2) # NHWC -> NCHW
steps = int(np.log2(spp))
for _ in range(steps):
xp = torch.nn.functional.pad(x, (1,1,1,1), mode='replicate')
x = torch.nn.functional.conv2d(xp, w, padding=0, stride=2, groups=g)
return x.permute(0, 2, 3, 1).contiguous() # NCHW -> NHWC
#----------------------------------------------------------------------------
# Singleton initialize GLFW
#----------------------------------------------------------------------------
_glfw_initialized = False
def init_glfw():
global _glfw_initialized
try:
import glfw
glfw.ERROR_REPORTING = 'raise'
glfw.default_window_hints()
glfw.window_hint(glfw.VISIBLE, glfw.FALSE)
test = glfw.create_window(8, 8, "Test", None, None) # Create a window and see if not initialized yet
except glfw.GLFWError as e:
if e.error_code == glfw.NOT_INITIALIZED:
glfw.init()
_glfw_initialized = True
#----------------------------------------------------------------------------
# Image display function using OpenGL.
#----------------------------------------------------------------------------
_glfw_window = None
def display_image(image, title=None):
# Import OpenGL
import OpenGL.GL as gl
import glfw
# Zoom image if requested.
image = np.asarray(image[..., 0:3]) if image.shape[-1] == 4 else np.asarray(image)
height, width, channels = image.shape
# Initialize window.
init_glfw()
if title is None:
title = 'Debug window'
global _glfw_window
if _glfw_window is None:
glfw.default_window_hints()
_glfw_window = glfw.create_window(width, height, title, None, None)
glfw.make_context_current(_glfw_window)
glfw.show_window(_glfw_window)
glfw.swap_interval(0)
else:
glfw.make_context_current(_glfw_window)
glfw.set_window_title(_glfw_window, title)
glfw.set_window_size(_glfw_window, width, height)
# Update window.
glfw.poll_events()
gl.glClearColor(0, 0, 0, 1)
gl.glClear(gl.GL_COLOR_BUFFER_BIT)
gl.glWindowPos2f(0, 0)
gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1)
gl_format = {3: gl.GL_RGB, 2: gl.GL_RG, 1: gl.GL_LUMINANCE}[channels]
gl_dtype = {'uint8': gl.GL_UNSIGNED_BYTE, 'float32': gl.GL_FLOAT}[image.dtype.name]
gl.glDrawPixels(width, height, gl_format, gl_dtype, image[::-1])
glfw.swap_buffers(_glfw_window)
if glfw.window_should_close(_glfw_window):
return False
return True
#----------------------------------------------------------------------------
# Image save/load helper.
#----------------------------------------------------------------------------
def save_image(fn, x : np.ndarray):
try:
if os.path.splitext(fn)[1] == ".png":
imageio.imwrite(fn, np.clip(np.rint(x * 255.0), 0, 255).astype(np.uint8), compress_level=3) # Low compression for faster saving
else:
imageio.imwrite(fn, np.clip(np.rint(x * 255.0), 0, 255).astype(np.uint8))
except:
print("WARNING: FAILED to save image %s" % fn)
def save_image_raw(fn, x : np.ndarray):
try:
imageio.imwrite(fn, x)
except:
print("WARNING: FAILED to save image %s" % fn)
def load_image_raw(fn) -> np.ndarray:
return imageio.imread(fn)
def load_image(fn) -> np.ndarray:
img = load_image_raw(fn)
if img.dtype == np.float32: # HDR image
return img
else: # LDR image
return img.astype(np.float32) / 255
#----------------------------------------------------------------------------
def time_to_text(x):
if x > 3600:
return "%.2f h" % (x / 3600)
elif x > 60:
return "%.2f m" % (x / 60)
else:
return "%.2f s" % x
#----------------------------------------------------------------------------
def checkerboard(res, checker_size) -> np.ndarray:
tiles_y = (res[0] + (checker_size*2) - 1) // (checker_size*2)
tiles_x = (res[1] + (checker_size*2) - 1) // (checker_size*2)
check = np.kron([[1, 0] * tiles_x, [0, 1] * tiles_x] * tiles_y, np.ones((checker_size, checker_size)))*0.33 + 0.33
check = check[:res[0], :res[1]]
return np.stack((check, check, check), axis=-1)