crevelop commited on
Commit
5e116f2
1 Parent(s): bc2232a

feat: upload model files and gradio app

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +2 -0
  3. .gitignore +0 -0
  4. app.py +257 -0
  5. assets/example_image/T.png +0 -0
  6. assets/example_image/typical_building_building.png +0 -0
  7. assets/example_image/typical_building_castle.png +0 -0
  8. assets/example_image/typical_building_colorful_cottage.png +0 -0
  9. assets/example_image/typical_building_maya_pyramid.png +0 -0
  10. assets/example_image/typical_building_mushroom.png +0 -0
  11. assets/example_image/typical_building_space_station.png +0 -0
  12. assets/example_image/typical_creature_dragon.png +0 -0
  13. assets/example_image/typical_creature_elephant.png +0 -0
  14. assets/example_image/typical_creature_furry.png +0 -0
  15. assets/example_image/typical_creature_quadruped.png +0 -0
  16. assets/example_image/typical_creature_robot_crab.png +0 -0
  17. assets/example_image/typical_creature_robot_dinosour.png +0 -0
  18. assets/example_image/typical_creature_rock_monster.png +0 -0
  19. assets/example_image/typical_humanoid_block_robot.png +0 -0
  20. assets/example_image/typical_humanoid_dragonborn.png +0 -0
  21. assets/example_image/typical_humanoid_dwarf.png +0 -0
  22. assets/example_image/typical_humanoid_goblin.png +0 -0
  23. assets/example_image/typical_humanoid_mech.png +0 -0
  24. assets/example_image/typical_misc_crate.png +0 -0
  25. assets/example_image/typical_misc_fireplace.png +0 -0
  26. assets/example_image/typical_misc_gate.png +0 -0
  27. assets/example_image/typical_misc_lantern.png +0 -0
  28. assets/example_image/typical_misc_magicbook.png +0 -0
  29. assets/example_image/typical_misc_mailbox.png +0 -0
  30. assets/example_image/typical_misc_monster_chest.png +0 -0
  31. assets/example_image/typical_misc_paper_machine.png +0 -0
  32. assets/example_image/typical_misc_phonograph.png +0 -0
  33. assets/example_image/typical_misc_portal2.png +0 -0
  34. assets/example_image/typical_misc_storage_chest.png +0 -0
  35. assets/example_image/typical_misc_telephone.png +0 -0
  36. assets/example_image/typical_misc_television.png +0 -0
  37. assets/example_image/typical_misc_workbench.png +0 -0
  38. assets/example_image/typical_vehicle_biplane.png +0 -0
  39. assets/example_image/typical_vehicle_bulldozer.png +0 -0
  40. assets/example_image/typical_vehicle_cart.png +0 -0
  41. assets/example_image/typical_vehicle_excavator.png +0 -0
  42. assets/example_image/typical_vehicle_helicopter.png +0 -0
  43. assets/example_image/typical_vehicle_locomotive.png +0 -0
  44. assets/example_image/typical_vehicle_pirate_ship.png +0 -0
  45. assets/example_image/weatherworn_misc_paper_machine3.png +0 -0
  46. extensions/nvdiffrast/LICENSE.txt +97 -0
  47. extensions/nvdiffrast/README.md +42 -0
  48. extensions/nvdiffrast/nvdiffrast/__init__.py +9 -0
  49. extensions/nvdiffrast/nvdiffrast/common/antialias.cu +558 -0
  50. extensions/nvdiffrast/nvdiffrast/common/antialias.h +50 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
37
+ *.whl filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
File without changes
app.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ from gradio_litmodel3d import LitModel3D
4
+
5
+ import os
6
+ os.environ['SPCONV_ALGO'] = 'native'
7
+ from typing import *
8
+ import torch
9
+ import numpy as np
10
+ import imageio
11
+ import uuid
12
+ from easydict import EasyDict as edict
13
+ from PIL import Image
14
+ from trellis.pipelines import TrellisImageTo3DPipeline
15
+ from trellis.representations import Gaussian, MeshExtractResult
16
+ from trellis.utils import render_utils, postprocessing_utils
17
+
18
+
19
+ MAX_SEED = np.iinfo(np.int32).max
20
+ TMP_DIR = "/tmp/Trellis-demo"
21
+
22
+ os.makedirs(TMP_DIR, exist_ok=True)
23
+
24
+
25
+ def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
26
+ """
27
+ Preprocess the input image.
28
+
29
+ Args:
30
+ image (Image.Image): The input image.
31
+
32
+ Returns:
33
+ str: uuid of the trial.
34
+ Image.Image: The preprocessed image.
35
+ """
36
+ trial_id = str(uuid.uuid4())
37
+ processed_image = pipeline.preprocess_image(image)
38
+ processed_image.save(f"{TMP_DIR}/{trial_id}.png")
39
+ return trial_id, processed_image
40
+
41
+
42
+ def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
43
+ return {
44
+ 'gaussian': {
45
+ **gs.init_params,
46
+ '_xyz': gs._xyz.cpu().numpy(),
47
+ '_features_dc': gs._features_dc.cpu().numpy(),
48
+ '_scaling': gs._scaling.cpu().numpy(),
49
+ '_rotation': gs._rotation.cpu().numpy(),
50
+ '_opacity': gs._opacity.cpu().numpy(),
51
+ },
52
+ 'mesh': {
53
+ 'vertices': mesh.vertices.cpu().numpy(),
54
+ 'faces': mesh.faces.cpu().numpy(),
55
+ },
56
+ 'trial_id': trial_id,
57
+ }
58
+
59
+
60
+ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
61
+ gs = Gaussian(
62
+ aabb=state['gaussian']['aabb'],
63
+ sh_degree=state['gaussian']['sh_degree'],
64
+ mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
65
+ scaling_bias=state['gaussian']['scaling_bias'],
66
+ opacity_bias=state['gaussian']['opacity_bias'],
67
+ scaling_activation=state['gaussian']['scaling_activation'],
68
+ )
69
+ gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
70
+ gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
71
+ gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
72
+ gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
73
+ gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
74
+
75
+ mesh = edict(
76
+ vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
77
+ faces=torch.tensor(state['mesh']['faces'], device='cuda'),
78
+ )
79
+
80
+ return gs, mesh, state['trial_id']
81
+
82
+
83
+ @spaces.GPU
84
+ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float, ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int) -> Tuple[dict, str]:
85
+ """
86
+ Convert an image to a 3D model.
87
+
88
+ Args:
89
+ trial_id (str): The uuid of the trial.
90
+ seed (int): The random seed.
91
+ randomize_seed (bool): Whether to randomize the seed.
92
+ ss_guidance_strength (float): The guidance strength for sparse structure generation.
93
+ ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
94
+ slat_guidance_strength (float): The guidance strength for structured latent generation.
95
+ slat_sampling_steps (int): The number of sampling steps for structured latent generation.
96
+
97
+ Returns:
98
+ dict: The information of the generated 3D model.
99
+ str: The path to the video of the 3D model.
100
+ """
101
+ if randomize_seed:
102
+ seed = np.random.randint(0, MAX_SEED)
103
+ outputs = pipeline.run(
104
+ Image.open(f"{TMP_DIR}/{trial_id}.png"),
105
+ seed=seed,
106
+ formats=["gaussian", "mesh"],
107
+ preprocess_image=False,
108
+ sparse_structure_sampler_params={
109
+ "steps": ss_sampling_steps,
110
+ "cfg_strength": ss_guidance_strength,
111
+ },
112
+ slat_sampler_params={
113
+ "steps": slat_sampling_steps,
114
+ "cfg_strength": slat_guidance_strength,
115
+ },
116
+ )
117
+ video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
118
+ video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
119
+ video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
120
+ trial_id = uuid.uuid4()
121
+ video_path = f"{TMP_DIR}/{trial_id}.mp4"
122
+ os.makedirs(os.path.dirname(video_path), exist_ok=True)
123
+ imageio.mimsave(video_path, video, fps=15)
124
+ state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
125
+ return state, video_path
126
+
127
+
128
+ @spaces.GPU
129
+ def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
130
+ """
131
+ Extract a GLB file from the 3D model.
132
+
133
+ Args:
134
+ state (dict): The state of the generated 3D model.
135
+ mesh_simplify (float): The mesh simplification factor.
136
+ texture_size (int): The texture resolution.
137
+
138
+ Returns:
139
+ str: The path to the extracted GLB file.
140
+ """
141
+ gs, mesh, trial_id = unpack_state(state)
142
+ glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
143
+ glb_path = f"{TMP_DIR}/{trial_id}.glb"
144
+ glb.export(glb_path)
145
+ return glb_path, glb_path
146
+
147
+
148
+ def activate_button() -> gr.Button:
149
+ return gr.Button(interactive=True)
150
+
151
+
152
+ def deactivate_button() -> gr.Button:
153
+ return gr.Button(interactive=False)
154
+
155
+
156
+ with gr.Blocks() as demo:
157
+ gr.Markdown("""
158
+ ## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
159
+ * Upload an image and click "Generate" to create a 3D asset. If the image has alpha channel, it be used as the mask. Otherwise, we use `rembg` to remove the background.
160
+ * If you find the generated 3D asset satisfactory, click "Extract GLB" to extract the GLB file and download it.
161
+ """)
162
+
163
+ with gr.Row():
164
+ with gr.Column():
165
+ image_prompt = gr.Image(label="Image Prompt", image_mode="RGBA", type="pil", height=300)
166
+
167
+ with gr.Accordion(label="Generation Settings", open=False):
168
+ seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
169
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
170
+ gr.Markdown("Stage 1: Sparse Structure Generation")
171
+ with gr.Row():
172
+ ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
173
+ ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
174
+ gr.Markdown("Stage 2: Structured Latent Generation")
175
+ with gr.Row():
176
+ slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
177
+ slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
178
+
179
+ generate_btn = gr.Button("Generate")
180
+
181
+ with gr.Accordion(label="GLB Extraction Settings", open=False):
182
+ mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
183
+ texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
184
+
185
+ extract_glb_btn = gr.Button("Extract GLB", interactive=False)
186
+
187
+ with gr.Column():
188
+ video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
189
+ model_output = LitModel3D(label="Extracted GLB", exposure=20.0, height=300)
190
+ download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
191
+
192
+ trial_id = gr.Textbox(visible=False)
193
+ output_buf = gr.State()
194
+
195
+ # Example images at the bottom of the page
196
+ with gr.Row():
197
+ examples = gr.Examples(
198
+ examples=[
199
+ f'assets/example_image/{image}'
200
+ for image in os.listdir("assets/example_image")
201
+ ],
202
+ inputs=[image_prompt],
203
+ fn=preprocess_image,
204
+ outputs=[trial_id, image_prompt],
205
+ run_on_click=True,
206
+ examples_per_page=64,
207
+ )
208
+
209
+ # Handlers
210
+ image_prompt.upload(
211
+ preprocess_image,
212
+ inputs=[image_prompt],
213
+ outputs=[trial_id, image_prompt],
214
+ )
215
+ image_prompt.clear(
216
+ lambda: '',
217
+ outputs=[trial_id],
218
+ )
219
+
220
+ generate_btn.click(
221
+ image_to_3d,
222
+ inputs=[trial_id, seed, randomize_seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
223
+ outputs=[output_buf, video_output],
224
+ ).then(
225
+ activate_button,
226
+ outputs=[extract_glb_btn],
227
+ )
228
+
229
+ video_output.clear(
230
+ deactivate_button,
231
+ outputs=[extract_glb_btn],
232
+ )
233
+
234
+ extract_glb_btn.click(
235
+ extract_glb,
236
+ inputs=[output_buf, mesh_simplify, texture_size],
237
+ outputs=[model_output, download_glb],
238
+ ).then(
239
+ activate_button,
240
+ outputs=[download_glb],
241
+ )
242
+
243
+ model_output.clear(
244
+ deactivate_button,
245
+ outputs=[download_glb],
246
+ )
247
+
248
+
249
+ # Launch the Gradio app
250
+ if __name__ == "__main__":
251
+ pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
252
+ pipeline.cuda()
253
+ try:
254
+ pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8))) # Preload rembg
255
+ except:
256
+ pass
257
+ demo.launch()
assets/example_image/T.png ADDED
assets/example_image/typical_building_building.png ADDED
assets/example_image/typical_building_castle.png ADDED
assets/example_image/typical_building_colorful_cottage.png ADDED
assets/example_image/typical_building_maya_pyramid.png ADDED
assets/example_image/typical_building_mushroom.png ADDED
assets/example_image/typical_building_space_station.png ADDED
assets/example_image/typical_creature_dragon.png ADDED
assets/example_image/typical_creature_elephant.png ADDED
assets/example_image/typical_creature_furry.png ADDED
assets/example_image/typical_creature_quadruped.png ADDED
assets/example_image/typical_creature_robot_crab.png ADDED
assets/example_image/typical_creature_robot_dinosour.png ADDED
assets/example_image/typical_creature_rock_monster.png ADDED
assets/example_image/typical_humanoid_block_robot.png ADDED
assets/example_image/typical_humanoid_dragonborn.png ADDED
assets/example_image/typical_humanoid_dwarf.png ADDED
assets/example_image/typical_humanoid_goblin.png ADDED
assets/example_image/typical_humanoid_mech.png ADDED
assets/example_image/typical_misc_crate.png ADDED
assets/example_image/typical_misc_fireplace.png ADDED
assets/example_image/typical_misc_gate.png ADDED
assets/example_image/typical_misc_lantern.png ADDED
assets/example_image/typical_misc_magicbook.png ADDED
assets/example_image/typical_misc_mailbox.png ADDED
assets/example_image/typical_misc_monster_chest.png ADDED
assets/example_image/typical_misc_paper_machine.png ADDED
assets/example_image/typical_misc_phonograph.png ADDED
assets/example_image/typical_misc_portal2.png ADDED
assets/example_image/typical_misc_storage_chest.png ADDED
assets/example_image/typical_misc_telephone.png ADDED
assets/example_image/typical_misc_television.png ADDED
assets/example_image/typical_misc_workbench.png ADDED
assets/example_image/typical_vehicle_biplane.png ADDED
assets/example_image/typical_vehicle_bulldozer.png ADDED
assets/example_image/typical_vehicle_cart.png ADDED
assets/example_image/typical_vehicle_excavator.png ADDED
assets/example_image/typical_vehicle_helicopter.png ADDED
assets/example_image/typical_vehicle_locomotive.png ADDED
assets/example_image/typical_vehicle_pirate_ship.png ADDED
assets/example_image/weatherworn_misc_paper_machine3.png ADDED
extensions/nvdiffrast/LICENSE.txt ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
2
+
3
+
4
+ Nvidia Source Code License (1-Way Commercial)
5
+
6
+ =======================================================================
7
+
8
+ 1. Definitions
9
+
10
+ "Licensor" means any person or entity that distributes its Work.
11
+
12
+ "Software" means the original work of authorship made available under
13
+ this License.
14
+
15
+ "Work" means the Software and any additions to or derivative works of
16
+ the Software that are made available under this License.
17
+
18
+ The terms "reproduce," "reproduction," "derivative works," and
19
+ "distribution" have the meaning as provided under U.S. copyright law;
20
+ provided, however, that for the purposes of this License, derivative
21
+ works shall not include works that remain separable from, or merely
22
+ link (or bind by name) to the interfaces of, the Work.
23
+
24
+ Works, including the Software, are "made available" under this License
25
+ by including in or with the Work either (a) a copyright notice
26
+ referencing the applicability of this License to the Work, or (b) a
27
+ copy of this License.
28
+
29
+ 2. License Grants
30
+
31
+ 2.1 Copyright Grant. Subject to the terms and conditions of this
32
+ License, each Licensor grants to you a perpetual, worldwide,
33
+ non-exclusive, royalty-free, copyright license to reproduce,
34
+ prepare derivative works of, publicly display, publicly perform,
35
+ sublicense and distribute its Work and any resulting derivative
36
+ works in any form.
37
+
38
+ 3. Limitations
39
+
40
+ 3.1 Redistribution. You may reproduce or distribute the Work only
41
+ if (a) you do so under this License, (b) you include a complete
42
+ copy of this License with your distribution, and (c) you retain
43
+ without modification any copyright, patent, trademark, or
44
+ attribution notices that are present in the Work.
45
+
46
+ 3.2 Derivative Works. You may specify that additional or different
47
+ terms apply to the use, reproduction, and distribution of your
48
+ derivative works of the Work ("Your Terms") only if (a) Your Terms
49
+ provide that the use limitation in Section 3.3 applies to your
50
+ derivative works, and (b) you identify the specific derivative
51
+ works that are subject to Your Terms. Notwithstanding Your Terms,
52
+ this License (including the redistribution requirements in Section
53
+ 3.1) will continue to apply to the Work itself.
54
+
55
+ 3.3 Use Limitation. The Work and any derivative works thereof only
56
+ may be used or intended for use non-commercially. The Work or
57
+ derivative works thereof may be used or intended for use by Nvidia
58
+ or its affiliates commercially or non-commercially. As used herein,
59
+ "non-commercially" means for research or evaluation purposes only
60
+ and not for any direct or indirect monetary gain.
61
+
62
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim
63
+ against any Licensor (including any claim, cross-claim or
64
+ counterclaim in a lawsuit) to enforce any patents that you allege
65
+ are infringed by any Work, then your rights under this License from
66
+ such Licensor (including the grant in Section 2.1) will terminate
67
+ immediately.
68
+
69
+ 3.5 Trademarks. This License does not grant any rights to use any
70
+ Licensor's or its affiliates' names, logos, or trademarks, except
71
+ as necessary to reproduce the notices described in this License.
72
+
73
+ 3.6 Termination. If you violate any term of this License, then your
74
+ rights under this License (including the grant in Section 2.1) will
75
+ terminate immediately.
76
+
77
+ 4. Disclaimer of Warranty.
78
+
79
+ THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
80
+ KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
81
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
82
+ NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
83
+ THIS LICENSE.
84
+
85
+ 5. Limitation of Liability.
86
+
87
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
88
+ THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
89
+ SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
90
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
91
+ OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
92
+ (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
93
+ LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
94
+ COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
95
+ THE POSSIBILITY OF SUCH DAMAGES.
96
+
97
+ =======================================================================
extensions/nvdiffrast/README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Nvdiffrast – Modular Primitives for High-Performance Differentiable Rendering
2
+
3
+ ![Teaser image](./docs/img/teaser.png)
4
+
5
+ **Modular Primitives for High-Performance Differentiable Rendering**<br>
6
+ Samuli Laine, Janne Hellsten, Tero Karras, Yeongho Seol, Jaakko Lehtinen, Timo Aila<br>
7
+ [http://arxiv.org/abs/2011.03277](http://arxiv.org/abs/2011.03277)
8
+
9
+ Nvdiffrast is a PyTorch/TensorFlow library that provides high-performance primitive operations for rasterization-based differentiable rendering.
10
+ Please refer to &#x261E;&#x261E; [nvdiffrast documentation](https://nvlabs.github.io/nvdiffrast) &#x261C;&#x261C; for more information.
11
+
12
+ ## Licenses
13
+
14
+ Copyright &copy; 2020&ndash;2024, NVIDIA Corporation. All rights reserved.
15
+
16
+ This work is made available under the [Nvidia Source Code License](https://github.com/NVlabs/nvdiffrast/blob/main/LICENSE.txt).
17
+
18
+ For business inquiries, please visit our website and submit the form: [NVIDIA Research Licensing](https://www.nvidia.com/en-us/research/inquiries/)
19
+
20
+ We do not currently accept outside code contributions in the form of pull requests.
21
+
22
+ Environment map stored as part of `samples/data/envphong.npz` is derived from a Wave Engine
23
+ [sample material](https://github.com/WaveEngine/Samples-2.5/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap)
24
+ originally shared under
25
+ [MIT License](https://github.com/WaveEngine/Samples-2.5/blob/master/LICENSE.md).
26
+ Mesh and texture stored as part of `samples/data/earth.npz` are derived from
27
+ [3D Earth Photorealistic 2K](https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125)
28
+ model originally made available under
29
+ [TurboSquid 3D Model License](https://blog.turbosquid.com/turbosquid-3d-model-license/#3d-model-license).
30
+
31
+ ## Citation
32
+
33
+ ```
34
+ @article{Laine2020diffrast,
35
+ title = {Modular Primitives for High-Performance Differentiable Rendering},
36
+ author = {Samuli Laine and Janne Hellsten and Tero Karras and Yeongho Seol and Jaakko Lehtinen and Timo Aila},
37
+ journal = {ACM Transactions on Graphics},
38
+ year = {2020},
39
+ volume = {39},
40
+ number = {6}
41
+ }
42
+ ```
extensions/nvdiffrast/nvdiffrast/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2
+ #
3
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ # and proprietary rights in and to this software, related documentation
5
+ # and any modifications thereto. Any use, reproduction, disclosure or
6
+ # distribution of this software and related documentation without an express
7
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ __version__ = '0.3.3'
extensions/nvdiffrast/nvdiffrast/common/antialias.cu ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2
+ //
3
+ // NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ // and proprietary rights in and to this software, related documentation
5
+ // and any modifications thereto. Any use, reproduction, disclosure or
6
+ // distribution of this software and related documentation without an express
7
+ // license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ #include "antialias.h"
10
+
11
+ //------------------------------------------------------------------------
12
+ // Helpers.
13
+
14
+ #define F32_MAX (3.402823466e+38f)
15
+ static __forceinline__ __device__ bool same_sign(float a, float b) { return (__float_as_int(a) ^ __float_as_int(b)) >= 0; }
16
+ static __forceinline__ __device__ bool rational_gt(float n0, float n1, float d0, float d1) { return (n0*d1 > n1*d0) == same_sign(d0, d1); }
17
+ static __forceinline__ __device__ int max_idx3(float n0, float n1, float n2, float d0, float d1, float d2)
18
+ {
19
+ bool g10 = rational_gt(n1, n0, d1, d0);
20
+ bool g20 = rational_gt(n2, n0, d2, d0);
21
+ bool g21 = rational_gt(n2, n1, d2, d1);
22
+ if (g20 && g21) return 2;
23
+ if (g10) return 1;
24
+ return 0;
25
+ }
26
+
27
+ //------------------------------------------------------------------------
28
+ // Format of antialiasing work items stored in work buffer. Usually accessed directly as int4.
29
+
30
+ struct AAWorkItem
31
+ {
32
+ enum
33
+ {
34
+ EDGE_MASK = 3, // Edge index in lowest bits.
35
+ FLAG_DOWN_BIT = 2, // Down instead of right.
36
+ FLAG_TRI1_BIT = 3, // Edge is from other pixel's triangle.
37
+ };
38
+
39
+ int px, py; // Pixel x, y.
40
+ unsigned int pz_flags; // High 16 bits = pixel z, low 16 bits = edge index and flags.
41
+ float alpha; // Antialiasing alpha value. Zero if no AA.
42
+ };
43
+
44
+ //------------------------------------------------------------------------
45
+ // Hash functions. Adapted from public-domain code at http://www.burtleburtle.net/bob/hash/doobs.html
46
+
47
+ #define JENKINS_MAGIC (0x9e3779b9u)
48
+ static __device__ __forceinline__ void jenkins_mix(unsigned int& a, unsigned int& b, unsigned int& c)
49
+ {
50
+ a -= b; a -= c; a ^= (c>>13);
51
+ b -= c; b -= a; b ^= (a<<8);
52
+ c -= a; c -= b; c ^= (b>>13);
53
+ a -= b; a -= c; a ^= (c>>12);
54
+ b -= c; b -= a; b ^= (a<<16);
55
+ c -= a; c -= b; c ^= (b>>5);
56
+ a -= b; a -= c; a ^= (c>>3);
57
+ b -= c; b -= a; b ^= (a<<10);
58
+ c -= a; c -= b; c ^= (b>>15);
59
+ }
60
+
61
+ // Helper class for hash index iteration. Implements simple odd-skip linear probing with a key-dependent skip.
62
+ class HashIndex
63
+ {
64
+ public:
65
+ __device__ __forceinline__ HashIndex(const AntialiasKernelParams& p, uint64_t key)
66
+ {
67
+ m_mask = (p.allocTriangles << AA_LOG_HASH_ELEMENTS_PER_TRIANGLE(p.allocTriangles)) - 1; // This should work until triangle count exceeds 1073741824.
68
+ m_idx = (uint32_t)(key & 0xffffffffu);
69
+ m_skip = (uint32_t)(key >> 32);
70
+ uint32_t dummy = JENKINS_MAGIC;
71
+ jenkins_mix(m_idx, m_skip, dummy);
72
+ m_idx &= m_mask;
73
+ m_skip &= m_mask;
74
+ m_skip |= 1;
75
+ }
76
+ __device__ __forceinline__ int get(void) const { return m_idx; }
77
+ __device__ __forceinline__ void next(void) { m_idx = (m_idx + m_skip) & m_mask; }
78
+ private:
79
+ uint32_t m_idx, m_skip, m_mask;
80
+ };
81
+
82
+ static __device__ __forceinline__ void hash_insert(const AntialiasKernelParams& p, uint64_t key, int v)
83
+ {
84
+ HashIndex idx(p, key);
85
+ while(1)
86
+ {
87
+ uint64_t prev = atomicCAS((unsigned long long*)&p.evHash[idx.get()], 0, (unsigned long long)key);
88
+ if (prev == 0 || prev == key)
89
+ break;
90
+ idx.next();
91
+ }
92
+ int* q = (int*)&p.evHash[idx.get()];
93
+ int a = atomicCAS(q+2, 0, v);
94
+ if (a != 0 && a != v)
95
+ atomicCAS(q+3, 0, v);
96
+ }
97
+
98
+ static __device__ __forceinline__ int2 hash_find(const AntialiasKernelParams& p, uint64_t key)
99
+ {
100
+ HashIndex idx(p, key);
101
+ while(1)
102
+ {
103
+ uint4 entry = p.evHash[idx.get()];
104
+ uint64_t k = ((uint64_t)entry.x) | (((uint64_t)entry.y) << 32);
105
+ if (k == key || k == 0)
106
+ return make_int2((int)entry.z, (int)entry.w);
107
+ idx.next();
108
+ }
109
+ }
110
+
111
+ static __device__ __forceinline__ void evhash_insert_vertex(const AntialiasKernelParams& p, int va, int vb, int vn)
112
+ {
113
+ if (va == vb)
114
+ return;
115
+
116
+ uint64_t v0 = (uint32_t)min(va, vb) + 1; // canonical vertex order
117
+ uint64_t v1 = (uint32_t)max(va, vb) + 1;
118
+ uint64_t vk = v0 | (v1 << 32); // hash key
119
+ hash_insert(p, vk, vn + 1);
120
+ }
121
+
122
+ static __forceinline__ __device__ int evhash_find_vertex(const AntialiasKernelParams& p, int va, int vb, int vr)
123
+ {
124
+ if (va == vb)
125
+ return -1;
126
+
127
+ uint64_t v0 = (uint32_t)min(va, vb) + 1; // canonical vertex order
128
+ uint64_t v1 = (uint32_t)max(va, vb) + 1;
129
+ uint64_t vk = v0 | (v1 << 32); // hash key
130
+ int2 vn = hash_find(p, vk) - 1;
131
+ if (vn.x == vr) return vn.y;
132
+ if (vn.y == vr) return vn.x;
133
+ return -1;
134
+ }
135
+
136
+ //------------------------------------------------------------------------
137
+ // Mesh analysis kernel.
138
+
139
+ __global__ void AntialiasFwdMeshKernel(const AntialiasKernelParams p)
140
+ {
141
+ int idx = threadIdx.x + blockIdx.x * blockDim.x;
142
+ if (idx >= p.numTriangles)
143
+ return;
144
+
145
+ int v0 = p.tri[idx * 3 + 0];
146
+ int v1 = p.tri[idx * 3 + 1];
147
+ int v2 = p.tri[idx * 3 + 2];
148
+
149
+ if (v0 < 0 || v0 >= p.numVertices ||
150
+ v1 < 0 || v1 >= p.numVertices ||
151
+ v2 < 0 || v2 >= p.numVertices)
152
+ return;
153
+
154
+ if (v0 == v1 || v1 == v2 || v2 == v0)
155
+ return;
156
+
157
+ evhash_insert_vertex(p, v1, v2, v0);
158
+ evhash_insert_vertex(p, v2, v0, v1);
159
+ evhash_insert_vertex(p, v0, v1, v2);
160
+ }
161
+
162
+ //------------------------------------------------------------------------
163
+ // Discontinuity finder kernel.
164
+
165
+ __global__ void AntialiasFwdDiscontinuityKernel(const AntialiasKernelParams p)
166
+ {
167
+ // Calculate pixel position.
168
+ int px = blockIdx.x * AA_DISCONTINUITY_KERNEL_BLOCK_WIDTH + threadIdx.x;
169
+ int py = blockIdx.y * AA_DISCONTINUITY_KERNEL_BLOCK_HEIGHT + threadIdx.y;
170
+ int pz = blockIdx.z;
171
+ if (px >= p.width || py >= p.height || pz >= p.n)
172
+ return;
173
+
174
+ // Pointer to our TriIdx and fetch.
175
+ int pidx0 = ((px + p.width * (py + p.height * pz)) << 2) + 3;
176
+ float tri0 = p.rasterOut[pidx0]; // These can stay as float, as we only compare them against each other.
177
+
178
+ // Look right, clamp at edge.
179
+ int pidx1 = pidx0;
180
+ if (px < p.width - 1)
181
+ pidx1 += 4;
182
+ float tri1 = p.rasterOut[pidx1];
183
+
184
+ // Look down, clamp at edge.
185
+ int pidx2 = pidx0;
186
+ if (py < p.height - 1)
187
+ pidx2 += p.width << 2;
188
+ float tri2 = p.rasterOut[pidx2];
189
+
190
+ // Determine amount of work.
191
+ int count = 0;
192
+ if (tri1 != tri0) count = 1;
193
+ if (tri2 != tri0) count += 1;
194
+ if (!count)
195
+ return; // Exit warp.
196
+
197
+ // Coalesce work counter update to once per CTA.
198
+ __shared__ int s_temp;
199
+ s_temp = 0;
200
+ __syncthreads();
201
+ int idx = atomicAdd(&s_temp, count);
202
+ __syncthreads();
203
+ if (idx == 0)
204
+ {
205
+ int base = atomicAdd(&p.workBuffer[0].x, s_temp);
206
+ s_temp = base + 1; // don't clobber the counters in first slot.
207
+ }
208
+ __syncthreads();
209
+ idx += s_temp;
210
+
211
+ // Write to memory.
212
+ if (tri1 != tri0) p.workBuffer[idx++] = make_int4(px, py, (pz << 16), 0);
213
+ if (tri2 != tri0) p.workBuffer[idx] = make_int4(px, py, (pz << 16) + (1 << AAWorkItem::FLAG_DOWN_BIT), 0);
214
+ }
215
+
216
+ //------------------------------------------------------------------------
217
+ // Forward analysis kernel.
218
+
219
+ __global__ void AntialiasFwdAnalysisKernel(const AntialiasKernelParams p)
220
+ {
221
+ __shared__ int s_base;
222
+ int workCount = p.workBuffer[0].x;
223
+ for(;;)
224
+ {
225
+ // Persistent threads work fetcher.
226
+ __syncthreads();
227
+ if (threadIdx.x == 0)
228
+ s_base = atomicAdd(&p.workBuffer[0].y, AA_ANALYSIS_KERNEL_THREADS_PER_BLOCK);
229
+ __syncthreads();
230
+ int thread_idx = s_base + threadIdx.x;
231
+ if (thread_idx >= workCount)
232
+ return;
233
+
234
+ int4* pItem = p.workBuffer + thread_idx + 1;
235
+ int4 item = *pItem;
236
+ int px = item.x;
237
+ int py = item.y;
238
+ int pz = (int)(((unsigned int)item.z) >> 16);
239
+ int d = (item.z >> AAWorkItem::FLAG_DOWN_BIT) & 1;
240
+
241
+ int pixel0 = px + p.width * (py + p.height * pz);
242
+ int pixel1 = pixel0 + (d ? p.width : 1);
243
+ float2 zt0 = ((float2*)p.rasterOut)[(pixel0 << 1) + 1];
244
+ float2 zt1 = ((float2*)p.rasterOut)[(pixel1 << 1) + 1];
245
+ int tri0 = float_to_triidx(zt0.y) - 1;
246
+ int tri1 = float_to_triidx(zt1.y) - 1;
247
+
248
+ // Select triangle based on background / depth.
249
+ int tri = (tri0 >= 0) ? tri0 : tri1;
250
+ if (tri0 >= 0 && tri1 >= 0)
251
+ tri = (zt0.x < zt1.x) ? tri0 : tri1;
252
+ if (tri == tri1)
253
+ {
254
+ // Calculate with respect to neighbor pixel if chose that triangle.
255
+ px += 1 - d;
256
+ py += d;
257
+ }
258
+
259
+ // Bail out if triangle index is corrupt.
260
+ if (tri < 0 || tri >= p.numTriangles)
261
+ continue;
262
+
263
+ // Fetch vertex indices.
264
+ int vi0 = p.tri[tri * 3 + 0];
265
+ int vi1 = p.tri[tri * 3 + 1];
266
+ int vi2 = p.tri[tri * 3 + 2];
267
+
268
+ // Bail out if vertex indices are corrupt.
269
+ if (vi0 < 0 || vi0 >= p.numVertices ||
270
+ vi1 < 0 || vi1 >= p.numVertices ||
271
+ vi2 < 0 || vi2 >= p.numVertices)
272
+ continue;
273
+
274
+ // Fetch opposite vertex indices. Use vertex itself (always silhouette) if no opposite vertex exists.
275
+ int op0 = evhash_find_vertex(p, vi2, vi1, vi0);
276
+ int op1 = evhash_find_vertex(p, vi0, vi2, vi1);
277
+ int op2 = evhash_find_vertex(p, vi1, vi0, vi2);
278
+
279
+ // Instance mode: Adjust vertex indices based on minibatch index.
280
+ if (p.instance_mode)
281
+ {
282
+ int vbase = pz * p.numVertices;
283
+ vi0 += vbase;
284
+ vi1 += vbase;
285
+ vi2 += vbase;
286
+ if (op0 >= 0) op0 += vbase;
287
+ if (op1 >= 0) op1 += vbase;
288
+ if (op2 >= 0) op2 += vbase;
289
+ }
290
+
291
+ // Fetch vertex positions.
292
+ float4 p0 = ((float4*)p.pos)[vi0];
293
+ float4 p1 = ((float4*)p.pos)[vi1];
294
+ float4 p2 = ((float4*)p.pos)[vi2];
295
+ float4 o0 = (op0 < 0) ? p0 : ((float4*)p.pos)[op0];
296
+ float4 o1 = (op1 < 0) ? p1 : ((float4*)p.pos)[op1];
297
+ float4 o2 = (op2 < 0) ? p2 : ((float4*)p.pos)[op2];
298
+
299
+ // Project vertices to pixel space.
300
+ float w0 = 1.f / p0.w;
301
+ float w1 = 1.f / p1.w;
302
+ float w2 = 1.f / p2.w;
303
+ float ow0 = 1.f / o0.w;
304
+ float ow1 = 1.f / o1.w;
305
+ float ow2 = 1.f / o2.w;
306
+ float fx = (float)px + .5f - p.xh;
307
+ float fy = (float)py + .5f - p.yh;
308
+ float x0 = p0.x * w0 * p.xh - fx;
309
+ float y0 = p0.y * w0 * p.yh - fy;
310
+ float x1 = p1.x * w1 * p.xh - fx;
311
+ float y1 = p1.y * w1 * p.yh - fy;
312
+ float x2 = p2.x * w2 * p.xh - fx;
313
+ float y2 = p2.y * w2 * p.yh - fy;
314
+ float ox0 = o0.x * ow0 * p.xh - fx;
315
+ float oy0 = o0.y * ow0 * p.yh - fy;
316
+ float ox1 = o1.x * ow1 * p.xh - fx;
317
+ float oy1 = o1.y * ow1 * p.yh - fy;
318
+ float ox2 = o2.x * ow2 * p.xh - fx;
319
+ float oy2 = o2.y * ow2 * p.yh - fy;
320
+
321
+ // Signs to kill non-silhouette edges.
322
+ float bb = (x1-x0)*(y2-y0) - (x2-x0)*(y1-y0); // Triangle itself.
323
+ float a0 = (x1-ox0)*(y2-oy0) - (x2-ox0)*(y1-oy0); // Wings.
324
+ float a1 = (x2-ox1)*(y0-oy1) - (x0-ox1)*(y2-oy1);
325
+ float a2 = (x0-ox2)*(y1-oy2) - (x1-ox2)*(y0-oy2);
326
+
327
+ // If no matching signs anywhere, skip the rest.
328
+ if (same_sign(a0, bb) || same_sign(a1, bb) || same_sign(a2, bb))
329
+ {
330
+ // XY flip for horizontal edges.
331
+ if (d)
332
+ {
333
+ swap(x0, y0);
334
+ swap(x1, y1);
335
+ swap(x2, y2);
336
+ }
337
+
338
+ float dx0 = x2 - x1;
339
+ float dx1 = x0 - x2;
340
+ float dx2 = x1 - x0;
341
+ float dy0 = y2 - y1;
342
+ float dy1 = y0 - y2;
343
+ float dy2 = y1 - y0;
344
+
345
+ // Check if an edge crosses between us and the neighbor pixel.
346
+ float dc = -F32_MAX;
347
+ float ds = (tri == tri0) ? 1.f : -1.f;
348
+ float d0 = ds * (x1*dy0 - y1*dx0);
349
+ float d1 = ds * (x2*dy1 - y2*dx1);
350
+ float d2 = ds * (x0*dy2 - y0*dx2);
351
+
352
+ if (same_sign(y1, y2)) d0 = -F32_MAX, dy0 = 1.f;
353
+ if (same_sign(y2, y0)) d1 = -F32_MAX, dy1 = 1.f;
354
+ if (same_sign(y0, y1)) d2 = -F32_MAX, dy2 = 1.f;
355
+
356
+ int di = max_idx3(d0, d1, d2, dy0, dy1, dy2);
357
+ if (di == 0 && same_sign(a0, bb) && fabsf(dy0) >= fabsf(dx0)) dc = d0 / dy0;
358
+ if (di == 1 && same_sign(a1, bb) && fabsf(dy1) >= fabsf(dx1)) dc = d1 / dy1;
359
+ if (di == 2 && same_sign(a2, bb) && fabsf(dy2) >= fabsf(dx2)) dc = d2 / dy2;
360
+ float eps = .0625f; // Expect no more than 1/16 pixel inaccuracy.
361
+
362
+ // Adjust output image if a suitable edge was found.
363
+ if (dc > -eps && dc < 1.f + eps)
364
+ {
365
+ dc = fminf(fmaxf(dc, 0.f), 1.f);
366
+ float alpha = ds * (.5f - dc);
367
+ const float* pColor0 = p.color + pixel0 * p.channels;
368
+ const float* pColor1 = p.color + pixel1 * p.channels;
369
+ float* pOutput = p.output + (alpha > 0.f ? pixel0 : pixel1) * p.channels;
370
+ for (int i=0; i < p.channels; i++)
371
+ atomicAdd(&pOutput[i], alpha * (pColor1[i] - pColor0[i]));
372
+
373
+ // Rewrite the work item's flags and alpha. Keep original px, py.
374
+ unsigned int flags = pz << 16;
375
+ flags |= di;
376
+ flags |= d << AAWorkItem::FLAG_DOWN_BIT;
377
+ flags |= (__float_as_uint(ds) >> 31) << AAWorkItem::FLAG_TRI1_BIT;
378
+ ((int2*)pItem)[1] = make_int2(flags, __float_as_int(alpha));
379
+ }
380
+ }
381
+ }
382
+ }
383
+
384
+ //------------------------------------------------------------------------
385
+ // Gradient kernel.
386
+
387
+ __global__ void AntialiasGradKernel(const AntialiasKernelParams p)
388
+ {
389
+ // Temporary space for coalesced atomics.
390
+ CA_DECLARE_TEMP(AA_GRAD_KERNEL_THREADS_PER_BLOCK);
391
+ __shared__ int s_base; // Work counter communication across entire CTA.
392
+
393
+ int workCount = p.workBuffer[0].x;
394
+
395
+ for(;;)
396
+ {
397
+ // Persistent threads work fetcher.
398
+ __syncthreads();
399
+ if (threadIdx.x == 0)
400
+ s_base = atomicAdd(&p.workBuffer[0].y, AA_GRAD_KERNEL_THREADS_PER_BLOCK);
401
+ __syncthreads();
402
+ int thread_idx = s_base + threadIdx.x;
403
+ if (thread_idx >= workCount)
404
+ return;
405
+
406
+ // Read work item filled out by forward kernel.
407
+ int4 item = p.workBuffer[thread_idx + 1];
408
+ unsigned int amask = __ballot_sync(0xffffffffu, item.w);
409
+ if (item.w == 0)
410
+ continue; // No effect.
411
+
412
+ // Unpack work item and replicate setup from forward analysis kernel.
413
+ int px = item.x;
414
+ int py = item.y;
415
+ int pz = (int)(((unsigned int)item.z) >> 16);
416
+ int d = (item.z >> AAWorkItem::FLAG_DOWN_BIT) & 1;
417
+ float alpha = __int_as_float(item.w);
418
+ int tri1 = (item.z >> AAWorkItem::FLAG_TRI1_BIT) & 1;
419
+ int di = item.z & AAWorkItem::EDGE_MASK;
420
+ float ds = __int_as_float(__float_as_int(1.0) | (tri1 << 31));
421
+ int pixel0 = px + p.width * (py + p.height * pz);
422
+ int pixel1 = pixel0 + (d ? p.width : 1);
423
+ int tri = float_to_triidx(p.rasterOut[((tri1 ? pixel1 : pixel0) << 2) + 3]) - 1;
424
+ if (tri1)
425
+ {
426
+ px += 1 - d;
427
+ py += d;
428
+ }
429
+
430
+ // Bail out if triangle index is corrupt.
431
+ bool triFail = (tri < 0 || tri >= p.numTriangles);
432
+ amask = __ballot_sync(amask, !triFail);
433
+ if (triFail)
434
+ continue;
435
+
436
+ // Outgoing color gradients.
437
+ float* pGrad0 = p.gradColor + pixel0 * p.channels;
438
+ float* pGrad1 = p.gradColor + pixel1 * p.channels;
439
+
440
+ // Incoming color gradients.
441
+ const float* pDy = p.dy + (alpha > 0.f ? pixel0 : pixel1) * p.channels;
442
+
443
+ // Position gradient weight based on colors and incoming gradients.
444
+ float dd = 0.f;
445
+ const float* pColor0 = p.color + pixel0 * p.channels;
446
+ const float* pColor1 = p.color + pixel1 * p.channels;
447
+
448
+ // Loop over channels and accumulate.
449
+ for (int i=0; i < p.channels; i++)
450
+ {
451
+ float dy = pDy[i];
452
+ if (dy != 0.f)
453
+ {
454
+ // Update position gradient weight.
455
+ dd += dy * (pColor1[i] - pColor0[i]);
456
+
457
+ // Update color gradients. No coalescing because all have different targets.
458
+ float v = alpha * dy;
459
+ atomicAdd(&pGrad0[i], -v);
460
+ atomicAdd(&pGrad1[i], v);
461
+ }
462
+ }
463
+
464
+ // If position weight is zero, skip the rest.
465
+ bool noGrad = (dd == 0.f);
466
+ amask = __ballot_sync(amask, !noGrad);
467
+ if (noGrad)
468
+ continue;
469
+
470
+ // Fetch vertex indices of the active edge and their positions.
471
+ int i1 = (di < 2) ? (di + 1) : 0;
472
+ int i2 = (i1 < 2) ? (i1 + 1) : 0;
473
+ int vi1 = p.tri[3 * tri + i1];
474
+ int vi2 = p.tri[3 * tri + i2];
475
+
476
+ // Bail out if vertex indices are corrupt.
477
+ bool vtxFail = (vi1 < 0 || vi1 >= p.numVertices || vi2 < 0 || vi2 >= p.numVertices);
478
+ amask = __ballot_sync(amask, !vtxFail);
479
+ if (vtxFail)
480
+ continue;
481
+
482
+ // Instance mode: Adjust vertex indices based on minibatch index.
483
+ if (p.instance_mode)
484
+ {
485
+ vi1 += pz * p.numVertices;
486
+ vi2 += pz * p.numVertices;
487
+ }
488
+
489
+ // Fetch vertex positions.
490
+ float4 p1 = ((float4*)p.pos)[vi1];
491
+ float4 p2 = ((float4*)p.pos)[vi2];
492
+
493
+ // Project vertices to pixel space.
494
+ float pxh = p.xh;
495
+ float pyh = p.yh;
496
+ float fx = (float)px + .5f - pxh;
497
+ float fy = (float)py + .5f - pyh;
498
+
499
+ // XY flip for horizontal edges.
500
+ if (d)
501
+ {
502
+ swap(p1.x, p1.y);
503
+ swap(p2.x, p2.y);
504
+ swap(pxh, pyh);
505
+ swap(fx, fy);
506
+ }
507
+
508
+ // Gradient calculation setup.
509
+ float w1 = 1.f / p1.w;
510
+ float w2 = 1.f / p2.w;
511
+ float x1 = p1.x * w1 * pxh - fx;
512
+ float y1 = p1.y * w1 * pyh - fy;
513
+ float x2 = p2.x * w2 * pxh - fx;
514
+ float y2 = p2.y * w2 * pyh - fy;
515
+ float dx = x2 - x1;
516
+ float dy = y2 - y1;
517
+ float db = x1*dy - y1*dx;
518
+
519
+ // Compute inverse delta-y with epsilon.
520
+ float ep = copysignf(1e-3f, dy); // ~1/1000 pixel.
521
+ float iy = 1.f / (dy + ep);
522
+
523
+ // Compute position gradients.
524
+ float dby = db * iy;
525
+ float iw1 = -w1 * iy * dd;
526
+ float iw2 = w2 * iy * dd;
527
+ float gp1x = iw1 * pxh * y2;
528
+ float gp2x = iw2 * pxh * y1;
529
+ float gp1y = iw1 * pyh * (dby - x2);
530
+ float gp2y = iw2 * pyh * (dby - x1);
531
+ float gp1w = -(p1.x * gp1x + p1.y * gp1y) * w1;
532
+ float gp2w = -(p2.x * gp2x + p2.y * gp2y) * w2;
533
+
534
+ // XY flip the gradients.
535
+ if (d)
536
+ {
537
+ swap(gp1x, gp1y);
538
+ swap(gp2x, gp2y);
539
+ }
540
+
541
+ // Kill position gradients if alpha was saturated.
542
+ if (fabsf(alpha) >= 0.5f)
543
+ {
544
+ gp1x = gp1y = gp1w = 0.f;
545
+ gp2x = gp2y = gp2w = 0.f;
546
+ }
547
+
548
+ // Initialize coalesced atomics. Match both triangle ID and edge index.
549
+ // Also note that some threads may be inactive.
550
+ CA_SET_GROUP_MASK(tri ^ (di << 30), amask);
551
+
552
+ // Accumulate gradients.
553
+ caAtomicAdd3_xyw(p.gradPos + 4 * vi1, gp1x, gp1y, gp1w);
554
+ caAtomicAdd3_xyw(p.gradPos + 4 * vi2, gp2x, gp2y, gp2w);
555
+ }
556
+ }
557
+
558
+ //------------------------------------------------------------------------
extensions/nvdiffrast/nvdiffrast/common/antialias.h ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2
+ //
3
+ // NVIDIA CORPORATION and its licensors retain all intellectual property
4
+ // and proprietary rights in and to this software, related documentation
5
+ // and any modifications thereto. Any use, reproduction, disclosure or
6
+ // distribution of this software and related documentation without an express
7
+ // license agreement from NVIDIA CORPORATION is strictly prohibited.
8
+
9
+ #pragma once
10
+ #include "common.h"
11
+
12
+ //------------------------------------------------------------------------
13
+ // Constants and helpers.
14
+
15
+ #define AA_DISCONTINUITY_KERNEL_BLOCK_WIDTH 32
16
+ #define AA_DISCONTINUITY_KERNEL_BLOCK_HEIGHT 8
17
+ #define AA_ANALYSIS_KERNEL_THREADS_PER_BLOCK 256
18
+ #define AA_MESH_KERNEL_THREADS_PER_BLOCK 256
19
+ #define AA_HASH_ELEMENTS_PER_TRIANGLE(alloc) ((alloc) >= (2 << 25) ? 4 : 8) // With more than 16777216 triangles (alloc >= 33554432) use smallest possible value of 4 to conserve memory, otherwise use 8 for fewer collisions.
20
+ #define AA_LOG_HASH_ELEMENTS_PER_TRIANGLE(alloc) ((alloc) >= (2 << 25) ? 2 : 3)
21
+ #define AA_GRAD_KERNEL_THREADS_PER_BLOCK 256
22
+
23
+ //------------------------------------------------------------------------
24
+ // CUDA kernel params.
25
+
26
+ struct AntialiasKernelParams
27
+ {
28
+ const float* color; // Incoming color buffer.
29
+ const float* rasterOut; // Incoming rasterizer output buffer.
30
+ const int* tri; // Incoming triangle buffer.
31
+ const float* pos; // Incoming position buffer.
32
+ float* output; // Output buffer of forward kernel.
33
+ const float* dy; // Incoming gradients.
34
+ float* gradColor; // Output buffer, color gradient.
35
+ float* gradPos; // Output buffer, position gradient.
36
+ int4* workBuffer; // Buffer for storing intermediate work items. First item reserved for counters.
37
+ uint4* evHash; // Edge-vertex hash.
38
+ int allocTriangles; // Number of triangles accommodated by evHash. Always power of two.
39
+ int numTriangles; // Number of triangles.
40
+ int numVertices; // Number of vertices.
41
+ int width; // Input width.
42
+ int height; // Input height.
43
+ int n; // Minibatch size.
44
+ int channels; // Channel count in color input.
45
+ float xh, yh; // Transfer to pixel space.
46
+ int instance_mode; // 0=normal, 1=instance mode.
47
+ int tri_const; // 1 if triangle array is known to be constant.
48
+ };
49
+
50
+ //------------------------------------------------------------------------