1- import enum
1+ import sys , os , enum
2+ from packaging import version
23
34import numpy as np
45import cupy as cp
78
89from .vecmath import vtype_to_dtype
910
11+ try :
12+ import cuda as _cuda
13+ from cuda import cudart
14+ has_cudart = True
15+ has_gl_interop = version .parse (_cuda .__version__ ) >= version .parse ("11.6.0" )
16+ except ImportError :
17+ cudart = None
18+ has_cudart = False
19+ has_gl_interop = False
20+
21+ _cuda_opengl_interop_msg = (
22+ "Cuda Python low level bindings v11.6.0 or later are required to enable "
23+ f"Cuda/OpenGL interoperability.{ os .linesep } You can install the missing package with:"
24+ f"{ os .linesep } { sys .executable } -m pip install --upgrade --user cuda-python"
25+ )
26+
27+ if has_cudart :
28+ def format_cudart_err (err ):
29+ return (
30+ f"{ cudart .cudaGetErrorName (err )[1 ].decode ('utf-8' )} ({ int (err )} ): "
31+ f"{ cudart .cudaGetErrorString (err )[1 ].decode ('utf-8' )} "
32+ )
33+
34+
35+ def check_cudart_err (args ):
36+ if isinstance (args , tuple ):
37+ assert len (args ) >= 1
38+ err = args [0 ]
39+ if len (args ) == 1 :
40+ ret = None
41+ elif len (args ) == 2 :
42+ ret = args [1 ]
43+ else :
44+ ret = args [1 :]
45+ else :
46+ ret = None
47+
48+ assert isinstance (err , cudart .cudaError_t ), type (err )
49+ if err != cudart .cudaError_t .cudaSuccess :
50+ raise RuntimeError (format_cudart_err (err ))
51+
52+ return ret
53+
1054
1155class BufferImageFormat (enum .Enum ):
1256 UCHAR4 = 0
@@ -35,11 +79,22 @@ class CudaOutputBufferType(enum.Enum):
3579 ZERO_COPY = 2 , # general case, preferred for multi-gpu if not fully nvlink connected
3680 CUDA_P2P = 3 , # fully connected only, preferred for fully nvlink connected
3781
82+ @classmethod
83+ def enable_gl_interop (cls , fallback = True ):
84+ if has_gl_interop :
85+ return cls .GL_INTEROP
86+ elif fallback :
87+ msg = _cuda_opengl_interop_msg + f"{ os .linesep } Falling back to slower CUDA_DEVICE output buffer."
88+ print (msg )
89+ return cls .CUDA_DEVICE
90+ else :
91+ raise RuntimeError (_cuda_opengl_interop_msg )
92+
3893
3994class CudaOutputBuffer :
4095 __slots__ = ['_pixel_format' , '_buffer_type' , '_width' , '_height' ,
4196 '_device' , '_device_idx' , '_device' , '_stream' ,
42- '_host_buffer' , '_device_buffer' , '_pbo' ]
97+ '_host_buffer' , '_device_buffer' , '_cuda_gfx_ressource' , ' _pbo' ]
4398
4499 def __init__ (self , buffer_type , pixel_format , width , height , device_idx = 0 ):
45100 for attr in self .__slots__ :
@@ -50,6 +105,16 @@ def __init__(self, buffer_type, pixel_format, width, height, device_idx=0):
50105 self .buffer_type = buffer_type
51106 self .resize (width , height )
52107 self .stream = None
108+
109+ if buffer_type is CudaOutputBufferType .GL_INTEROP :
110+ if not has_gl_interop :
111+ raise RuntimeError (_cuda_opengl_interop_msg )
112+ device_count , device_ids = check_cudart_err ( cudart .cudaGLGetDevices (1 , cudart .cudaGLDeviceList .cudaGLDeviceListAll ) )
113+ if device_count <= 0 :
114+ raise RuntimeError ("No OpenGL device found, cannot enable GL_INTEROP." )
115+ elif device_ids [0 ] != device_idx :
116+ raise RuntimeError (f"OpenGL device id { device_ids [0 ]} does not match requested "
117+ f"device index { device_idx } for Cuda/OpenGL interop." )
53118
54119 self ._reallocate_buffers ()
55120
@@ -69,13 +134,29 @@ def map(self):
69134 self ._make_current ()
70135 if (self ._host_buffer is None ) or (self ._device_buffer is None ):
71136 self ._reallocate_buffers ()
72- return self ._device_buffer .data .ptr
137+ if self .buffer_type is CudaOutputBufferType .CUDA_DEVICE :
138+ return self ._device_buffer .data .ptr
139+ elif self .buffer_type is CudaOutputBufferType .GL_INTEROP :
140+ check_cudart_err (
141+ cudart .cudaGraphicsMapResources (1 , self ._cuda_gfx_ressource , self ._stream .ptr )
142+ )
143+ ptr , size = check_cudart_err (
144+ cudart .cudaGraphicsResourceGetMappedPointer (self ._cuda_gfx_ressource )
145+ )
146+ return ptr
147+ else :
148+ msg = f'Buffer type { self .buffer_type } has not been implemented yet.'
149+ raise NotImplementedError (msg )
73150
74151 def unmap (self ):
75152 self ._make_current ()
76153 buffer_type = self .buffer_type
77154 if buffer_type is CudaOutputBufferType .CUDA_DEVICE :
78155 self ._stream .synchronize ()
156+ elif buffer_type is CudaOutputBufferType .GL_INTEROP :
157+ check_cudart_err (
158+ cudart .cudaGraphicsUnmapResources (1 , self ._cuda_gfx_ressource , self ._stream .ptr )
159+ )
79160 else :
80161 msg = f'Buffer type { buffer_type } has not been implemented yet.'
81162 raise NotImplementedError (msg )
@@ -85,12 +166,13 @@ def get_pbo(self):
85166
86167 self ._make_current ()
87168
88- if self ._pbo is None :
89- self ._pbo = gl .glGenBuffers (1 )
90-
91169 if buffer_type is CudaOutputBufferType .CUDA_DEVICE :
170+ if self ._pbo is None :
171+ self ._pbo = gl .glGenBuffers (1 )
92172 self .copy_device_to_host ()
93173 self .copy_host_to_pbo ()
174+ elif buffer_type is CudaOutputBufferType .GL_INTEROP :
175+ assert self ._pbo is not None
94176 else :
95177 msg = f'Buffer type { buffer_type } has not been implemented yet.'
96178 raise NotImplementedError (msg )
@@ -121,14 +203,26 @@ def _reallocate_buffers(self):
121203
122204 dtype = self .pixel_format
123205 shape = (self .height , self .width )
206+
207+ self ._host_buffer = np .empty (shape = shape , dtype = dtype )
124208
125209 if buffer_type is CudaOutputBufferType .CUDA_DEVICE :
126- self ._host_buffer = np .empty (shape = shape , dtype = dtype )
127210 self ._device_buffer = cp .empty (shape = shape , dtype = dtype )
128211 if self ._pbo is not None :
129212 gl .glBindBuffer (gl .GL_ARRAY_BUFFER , self ._pbo )
130213 gl .glBufferData (gl .GL_ARRAY_BUFFER , self ._host_buffer , gl .GL_STREAM_DRAW )
131214 gl .glBindBuffer (gl .GL_ARRAY_BUFFER , 0 )
215+ elif buffer_type is CudaOutputBufferType .GL_INTEROP :
216+ self ._pbo = gl .glGenBuffers (1 ) if self ._pbo is None else self ._pbo
217+
218+ gl .glBindBuffer (gl .GL_ARRAY_BUFFER , self ._pbo )
219+ gl .glBufferData (gl .GL_ARRAY_BUFFER , self .width * self .height * dtype .itemsize , None , gl .GL_STREAM_DRAW )
220+ gl .glBindBuffer (gl .GL_ARRAY_BUFFER , 0 )
221+
222+ self .cuda_gfx_ressource = check_cudart_err (
223+ cudart .cudaGraphicsGLRegisterBuffer (self ._pbo ,
224+ cudart .cudaGraphicsRegisterFlags .cudaGraphicsRegisterFlagsWriteDiscard )
225+ )
132226 else :
133227 msg = f'Buffer type { buffer_type } has not been implemented yet.'
134228 raise NotImplementedError (msg )
@@ -215,3 +309,15 @@ def _set_stream(self, value):
215309 assert isinstance (value , cp .cuda .Stream ), type (value )
216310 self ._stream = value
217311 stream = property (_get_stream , _set_stream )
312+
313+ def _get_cuda_gfx_ressource (self ):
314+ assert self ._cuda_gfx_ressource is not None
315+ return self ._cuda_gfx_ressource
316+ def _set_cuda_gfx_ressource (self , value ):
317+ if (self ._cuda_gfx_ressource is not None ) and (self ._cuda_gfx_ressource != value ):
318+ check_cudart_err (
319+ cudart .cudaGraphicsUnregisterResource (self ._cuda_gfx_ressource )
320+ )
321+ self ._cuda_gfx_ressource = value
322+
323+ cuda_gfx_ressource = property (_get_cuda_gfx_ressource , _set_cuda_gfx_ressource )
0 commit comments