Skip to content

Commit 7f29d4f

Browse files
author
Felix Igelbrink
committed
Merge branch 'master' of github.com:mortacious/python-optix into feature/nn
2 parents e4efdc3 + 830f3ef commit 7f29d4f

File tree

11 files changed

+161
-35
lines changed

11 files changed

+161
-35
lines changed

examples/compile_with_tasks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,4 @@
7272
tic = time.time()
7373
for i in range(args.num_iters):
7474
module = ox.Module(ctx, ptx, module_compile_options=compile_opts, pipeline_compile_options=pipeline_options)
75-
print("Overall run time without tasks", time.time()-tic)
75+
print("Overall run time without tasks", time.time()-tic)

examples/cuda/helpers.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,6 @@ __forceinline__ __device__ float3 toSRGB( const float3& c )
4242
c.z < 0.0031308f ? 12.92f * c.z : 1.055f * powed.z - 0.055f );
4343
}
4444

45-
//__forceinline__ __device__ float dequantizeUnsigned8Bits( const unsigned char i )
46-
//{
47-
// enum { N = (1 << 8) - 1 };
48-
// return min((float)i / (float)N), 1.f)
49-
//}
5045
__forceinline__ __device__ unsigned char quantizeUnsigned8Bits( float x )
5146
{
5247
x = clamp( x, 0.0f, 1.0f );
@@ -60,6 +55,7 @@ __forceinline__ __device__ uchar4 make_color( const float3& c )
6055
float3 srgb = toSRGB( clamp( c, 0.0f, 1.0f ) );
6156
return make_uchar4( quantizeUnsigned8Bits( srgb.x ), quantizeUnsigned8Bits( srgb.y ), quantizeUnsigned8Bits( srgb.z ), 255u );
6257
}
58+
6359
__forceinline__ __device__ uchar4 make_color( const float4& c )
6460
{
6561
return make_color( make_float3( c.x, c.y, c.z ) );

examples/dynamic_geometry.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@
1818

1919
DEBUG=False
2020

21+
if DEBUG:
22+
exception_flags=ox.ExceptionFlags.DEBUG | ox.ExceptionFlags.TRACE_DEPTH | ox.ExceptionFlags.STACK_OVERFLOW,
23+
debug_level = ox.CompileDebugLevel.FULL
24+
opt_level = ox.CompileOptimizationLevel.LEVEL_0
25+
else:
26+
exception_flags=ox.ExceptionFlags.NONE
27+
debug_level = ox.CompileDebugLevel.MINIMAL
28+
opt_level = ox.CompileOptimizationLevel.LEVEL_3
29+
30+
2131
#------------------------------------------------------------------------------
2232
# Local types
2333
#------------------------------------------------------------------------------
@@ -336,15 +346,9 @@ def build_mesh_accel(state):
336346

337347

338348
def create_module(state):
339-
if DEBUG:
340-
exception_flags=ox.ExceptionFlags.DEBUG | ox.ExceptionFlags.TRACE_DEPTH | ox.ExceptionFlags.STACK_OVERFLOW,
341-
else:
342-
exception_flags=ox.ExceptionFlags.NONE
343-
344-
print("Triangle value", ox.PrimitiveTypeFlags.TRIANGLE.value)
345349
pipeline_opts = ox.PipelineCompileOptions(
346350
uses_motion_blur=False,
347-
uses_primitive_type_flags =ox.PrimitiveTypeFlags.TRIANGLE,
351+
uses_primitive_type_flags=ox.PrimitiveTypeFlags.TRIANGLE,
348352
traversable_graph_flags=ox.TraversableGraphFlags.ALLOW_SINGLE_LEVEL_INSTANCING,
349353
exception_flags=exception_flags,
350354
num_payload_values=3,
@@ -353,8 +357,7 @@ def create_module(state):
353357

354358
compile_opts = ox.ModuleCompileOptions(
355359
max_register_count=ox.ModuleCompileOptions.DEFAULT_MAX_REGISTER_COUNT,
356-
opt_level=ox.CompileOptimizationLevel.DEFAULT,
357-
debug_level=ox.CompileDebugLevel.MODERATE)
360+
opt_level=opt_level, debug_level=debug_level)
358361

359362
cuda_source = os.path.join(script_dir, 'cuda', 'dynamic_geometry.cu')
360363
state.module = ox.Module(state.ctx, cuda_source, compile_opts, pipeline_opts)
@@ -371,7 +374,7 @@ def create_pipeline(state):
371374
program_grps = [state.raygen_grp, state.miss_grp, state.hit_grp]
372375

373376
link_opts = ox.PipelineLinkOptions(max_trace_depth=1,
374-
debug_level=ox.CompileDebugLevel.MODERATE)
377+
debug_level=debug_level)
375378

376379
pipeline = ox.Pipeline(state.ctx,
377380
compile_options=state.pipeline_opts,
@@ -414,7 +417,7 @@ def create_sbt(state):
414417
animation_time = 1.0
415418

416419
buffer_format = BufferImageFormat.UCHAR4
417-
output_buffer_type = CudaOutputBufferType.CUDA_DEVICE
420+
output_buffer_type = CudaOutputBufferType.enable_gl_interop()
418421

419422
init_camera_state(state)
420423
create_context(state)

examples/hello.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
1+
import os, sys, logging
12
import optix as ox
23
import cupy as cp
34
import numpy as np
45
from PIL import Image, ImageOps
5-
import logging
6-
import sys
6+
77
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
88
log = logging.getLogger()
99

10+
script_dir = os.path.dirname(__file__)
11+
cuda_src = os.path.join(script_dir, "cuda", "hello.cu")
12+
1013
def create_module(ctx, pipeline_opts):
1114
compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.FULL, opt_level=ox.CompileOptimizationLevel.LEVEL_0)
12-
module = ox.Module(ctx, 'cuda/hello.cu', compile_opts, pipeline_opts)
15+
module = ox.Module(ctx, cuda_src, compile_opts, pipeline_opts)
1316
return module
1417

1518

examples/spheres.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1+
import os, sys, logging
12
import optix as ox
23
import cupy as cp
34
import numpy as np
45
from PIL import Image, ImageOps
5-
import logging
6-
import sys
6+
77
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
88
log = logging.getLogger()
9+
10+
script_dir = os.path.dirname(__file__)
11+
cuda_src = os.path.join(script_dir, "cuda", "spheres.cu")
12+
913
img_size = (1024, 768)
1014

1115
def compute_spheres_bbox(centers, radii):
@@ -23,7 +27,7 @@ def create_acceleration_structure(ctx, bboxes):
2327

2428
def create_module(ctx, pipeline_opts):
2529
compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.FULL, opt_level=ox.CompileOptimizationLevel.LEVEL_0)
26-
module = ox.Module(ctx, 'cuda/spheres.cu', compile_opts, pipeline_opts)
30+
module = ox.Module(ctx, cuda_src, compile_opts, pipeline_opts)
2731
return module
2832

2933

examples/sutil/cuda_output_buffer.py

Lines changed: 113 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
import enum
1+
import sys, os, enum
2+
from packaging import version
23

34
import numpy as np
45
import cupy as cp
@@ -7,6 +8,49 @@
78

89
from .vecmath import vtype_to_dtype
910

11+
try:
12+
import cuda as _cuda
13+
from cuda import cudart
14+
has_cudart = True
15+
has_gl_interop = version.parse(_cuda.__version__) >= version.parse("11.6.0")
16+
except ImportError:
17+
cudart = None
18+
has_cudart = False
19+
has_gl_interop = False
20+
21+
_cuda_opengl_interop_msg = (
22+
"Cuda Python low level bindings v11.6.0 or later are required to enable "
23+
f"Cuda/OpenGL interoperability.{os.linesep}You can install the missing package with:"
24+
f"{os.linesep} {sys.executable} -m pip install --upgrade --user cuda-python"
25+
)
26+
27+
if has_cudart:
28+
def format_cudart_err(err):
29+
return (
30+
f"{cudart.cudaGetErrorName(err)[1].decode('utf-8')}({int(err)}): "
31+
f"{cudart.cudaGetErrorString(err)[1].decode('utf-8')}"
32+
)
33+
34+
35+
def check_cudart_err(args):
36+
if isinstance(args, tuple):
37+
assert len(args) >= 1
38+
err = args[0]
39+
if len(args) == 1:
40+
ret = None
41+
elif len(args) == 2:
42+
ret = args[1]
43+
else:
44+
ret = args[1:]
45+
else:
46+
ret = None
47+
48+
assert isinstance(err, cudart.cudaError_t), type(err)
49+
if err != cudart.cudaError_t.cudaSuccess:
50+
raise RuntimeError(format_cudart_err(err))
51+
52+
return ret
53+
1054

1155
class BufferImageFormat(enum.Enum):
1256
UCHAR4=0
@@ -35,11 +79,22 @@ class CudaOutputBufferType(enum.Enum):
3579
ZERO_COPY = 2, # general case, preferred for multi-gpu if not fully nvlink connected
3680
CUDA_P2P = 3, # fully connected only, preferred for fully nvlink connected
3781

82+
@classmethod
83+
def enable_gl_interop(cls, fallback=True):
84+
if has_gl_interop:
85+
return cls.GL_INTEROP
86+
elif fallback:
87+
msg = _cuda_opengl_interop_msg + f"{os.linesep}Falling back to slower CUDA_DEVICE output buffer."
88+
print(msg)
89+
return cls.CUDA_DEVICE
90+
else:
91+
raise RuntimeError(_cuda_opengl_interop_msg)
92+
3893

3994
class CudaOutputBuffer:
4095
__slots__ = ['_pixel_format', '_buffer_type', '_width', '_height',
4196
'_device', '_device_idx', '_device', '_stream',
42-
'_host_buffer', '_device_buffer', '_pbo']
97+
'_host_buffer', '_device_buffer', '_cuda_gfx_ressource', '_pbo']
4398

4499
def __init__(self, buffer_type, pixel_format, width, height, device_idx=0):
45100
for attr in self.__slots__:
@@ -50,6 +105,16 @@ def __init__(self, buffer_type, pixel_format, width, height, device_idx=0):
50105
self.buffer_type = buffer_type
51106
self.resize(width, height)
52107
self.stream = None
108+
109+
if buffer_type is CudaOutputBufferType.GL_INTEROP:
110+
if not has_gl_interop:
111+
raise RuntimeError(_cuda_opengl_interop_msg)
112+
device_count, device_ids = check_cudart_err( cudart.cudaGLGetDevices(1, cudart.cudaGLDeviceList.cudaGLDeviceListAll) )
113+
if device_count <= 0:
114+
raise RuntimeError("No OpenGL device found, cannot enable GL_INTEROP.")
115+
elif device_ids[0] != device_idx:
116+
raise RuntimeError(f"OpenGL device id {device_ids[0]} does not match requested "
117+
f"device index {device_idx} for Cuda/OpenGL interop.")
53118

54119
self._reallocate_buffers()
55120

@@ -69,13 +134,29 @@ def map(self):
69134
self._make_current()
70135
if (self._host_buffer is None) or (self._device_buffer is None):
71136
self._reallocate_buffers()
72-
return self._device_buffer.data.ptr
137+
if self.buffer_type is CudaOutputBufferType.CUDA_DEVICE:
138+
return self._device_buffer.data.ptr
139+
elif self.buffer_type is CudaOutputBufferType.GL_INTEROP:
140+
check_cudart_err(
141+
cudart.cudaGraphicsMapResources(1, self._cuda_gfx_ressource, self._stream.ptr)
142+
)
143+
ptr, size = check_cudart_err(
144+
cudart.cudaGraphicsResourceGetMappedPointer(self._cuda_gfx_ressource)
145+
)
146+
return ptr
147+
else:
148+
msg = f'Buffer type {self.buffer_type} has not been implemented yet.'
149+
raise NotImplementedError(msg)
73150

74151
def unmap(self):
75152
self._make_current()
76153
buffer_type = self.buffer_type
77154
if buffer_type is CudaOutputBufferType.CUDA_DEVICE:
78155
self._stream.synchronize()
156+
elif buffer_type is CudaOutputBufferType.GL_INTEROP:
157+
check_cudart_err(
158+
cudart.cudaGraphicsUnmapResources(1, self._cuda_gfx_ressource, self._stream.ptr)
159+
)
79160
else:
80161
msg = f'Buffer type {buffer_type} has not been implemented yet.'
81162
raise NotImplementedError(msg)
@@ -85,12 +166,13 @@ def get_pbo(self):
85166

86167
self._make_current()
87168

88-
if self._pbo is None:
89-
self._pbo = gl.glGenBuffers(1)
90-
91169
if buffer_type is CudaOutputBufferType.CUDA_DEVICE:
170+
if self._pbo is None:
171+
self._pbo = gl.glGenBuffers(1)
92172
self.copy_device_to_host()
93173
self.copy_host_to_pbo()
174+
elif buffer_type is CudaOutputBufferType.GL_INTEROP:
175+
assert self._pbo is not None
94176
else:
95177
msg = f'Buffer type {buffer_type} has not been implemented yet.'
96178
raise NotImplementedError(msg)
@@ -121,14 +203,26 @@ def _reallocate_buffers(self):
121203

122204
dtype = self.pixel_format
123205
shape = (self.height, self.width)
206+
207+
self._host_buffer = np.empty(shape=shape, dtype=dtype)
124208

125209
if buffer_type is CudaOutputBufferType.CUDA_DEVICE:
126-
self._host_buffer = np.empty(shape=shape, dtype=dtype)
127210
self._device_buffer = cp.empty(shape=shape, dtype=dtype)
128211
if self._pbo is not None:
129212
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._pbo)
130213
gl.glBufferData(gl.GL_ARRAY_BUFFER, self._host_buffer, gl.GL_STREAM_DRAW)
131214
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0)
215+
elif buffer_type is CudaOutputBufferType.GL_INTEROP:
216+
self._pbo = gl.glGenBuffers(1) if self._pbo is None else self._pbo
217+
218+
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._pbo)
219+
gl.glBufferData(gl.GL_ARRAY_BUFFER, self.width*self.height*dtype.itemsize, None, gl.GL_STREAM_DRAW)
220+
gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0)
221+
222+
self.cuda_gfx_ressource = check_cudart_err(
223+
cudart.cudaGraphicsGLRegisterBuffer(self._pbo,
224+
cudart.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsWriteDiscard)
225+
)
132226
else:
133227
msg = f'Buffer type {buffer_type} has not been implemented yet.'
134228
raise NotImplementedError(msg)
@@ -215,3 +309,15 @@ def _set_stream(self, value):
215309
assert isinstance(value, cp.cuda.Stream), type(value)
216310
self._stream = value
217311
stream = property(_get_stream, _set_stream)
312+
313+
def _get_cuda_gfx_ressource(self):
314+
assert self._cuda_gfx_ressource is not None
315+
return self._cuda_gfx_ressource
316+
def _set_cuda_gfx_ressource(self, value):
317+
if (self._cuda_gfx_ressource is not None) and (self._cuda_gfx_ressource != value):
318+
check_cudart_err(
319+
cudart.cudaGraphicsUnregisterResource(self._cuda_gfx_ressource)
320+
)
321+
self._cuda_gfx_ressource = value
322+
323+
cuda_gfx_ressource = property(_get_cuda_gfx_ressource, _set_cuda_gfx_ressource)

examples/sutil/gl_display.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ class GLDisplay:
4949
'_quad_vertex_buffer', '_image_format']
5050

5151
def __init__(self, image_format):
52-
print(image_format, type(image_format), isinstance(BufferImageFormat.UCHAR4, BufferImageFormat))
5352
assert isinstance(image_format, BufferImageFormat)
5453

5554
vertex_array = gl.glGenVertexArrays(1)

examples/triangle.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
import os
12
import optix as ox
23
import cupy as cp
34
import numpy as np
45
from PIL import Image, ImageOps
56

7+
script_dir = os.path.dirname(__file__)
8+
cuda_src = os.path.join(script_dir, "cuda", "triangle.cu")
9+
610
img_size = (1024, 768)
711

812
# use a regular function for logging
@@ -19,7 +23,7 @@ def create_acceleration_structure(ctx, vertices):
1923

2024
def create_module(ctx, pipeline_opts):
2125
compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.FULL, opt_level=ox.CompileOptimizationLevel.LEVEL_0)
22-
module = ox.Module(ctx, 'cuda/triangle.cu', compile_opts, pipeline_opts)
26+
module = ox.Module(ctx, cuda_src, compile_opts, pipeline_opts)
2327
return module
2428

2529

optix/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.1.0"
1+
__version__ = "0.1.1"

optix/optix_includes.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#pragma once
22

3+
#if defined(_MSC_VER)
4+
#define NOMINMAX
5+
#endif
6+
37
#include <optix.h>
48
#include <optix_stubs.h>
59
#include <optix_function_table_definition.h>
@@ -13,4 +17,4 @@ inline void optix_check_return(OptixResult result) {
1317
ss << ": " << optixGetErrorString(result);
1418
throw std::runtime_error(ss.str());
1519
}
16-
}
20+
}

0 commit comments

Comments
 (0)