From f854299c19778752f2b889b4a8cb310deb8dc47c Mon Sep 17 00:00:00 2001
From: hyperfraise <damien.menigaux@polytechnique.edu>
Date: Tue, 26 Apr 2022 17:46:16 +0200
Subject: [PATCH 1/3] add stream priority range function

---
 doc/driver.rst               |  4 ++++
 src/cpp/cuda.hpp             | 11 ++++++++++-
 src/wrapper/wrap_cudadrv.cpp |  3 +++
 3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/doc/driver.rst b/doc/driver.rst
index 288ed7cb..528b5297 100644
--- a/doc/driver.rst
+++ b/doc/driver.rst
@@ -644,6 +644,10 @@ Devices and Contexts
 
     See also :mod:`pycuda.autoinit`.
 
+.. function:: get_stream_priority_range()
+
+    Returns numerical values that correspond to the least and greatest stream priorities. 
+
 .. class:: Device(number)
         Device(pci_bus_id)
 
diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp
index 655f595b..fc4481d6 100644
--- a/src/cpp/cuda.hpp
+++ b/src/cpp/cuda.hpp
@@ -531,7 +531,6 @@ namespace pycuda
    * to push contexts that are already active at a deeper stack level, so we
    * maintain all contexts floating other than the top one.
    */
-
   // for friend decl
   namespace gl {
     boost::shared_ptr<context>
@@ -862,6 +861,16 @@ namespace pycuda
     return result;
   }
 
+  inline
+  py::tuple get_stream_priority_range()
+  {
+    int leastPriority;
+    int greatestPriority;
+    CUDAPP_CALL_GUARDED(cuCtxGetStreamPriorityRange, (&leastPriority, &greatestPriority));
+    return py::make_tuple(leastPriority, greatestPriority);
+  }
+
+
 
 #if CUDAPP_CUDA_VERSION >= 7000
   inline boost::shared_ptr<context> device::retain_primary_context()
diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp
index 3dc33c8a..4a86209c 100644
--- a/src/wrapper/wrap_cudadrv.cpp
+++ b/src/wrapper/wrap_cudadrv.cpp
@@ -1193,6 +1193,9 @@ BOOST_PYTHON_MODULE(_driver)
       .add_property("handle", &cl::handle_int)
       ;
   }
+
+  DEF_SIMPLE_FUNCTION(get_stream_priority_range);
+
   // }}}
 
   // {{{ stream

From 8187116caf6a19416d058f00d3758a64742cca68 Mon Sep 17 00:00:00 2001
From: hyperfraise <damien.menigaux@polytechnique.edu>
Date: Tue, 26 Apr 2022 17:46:20 +0200
Subject: [PATCH 2/3] add ci test

---
 test/test_driver.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/test/test_driver.py b/test/test_driver.py
index 98f3c8aa..6218f00a 100644
--- a/test/test_driver.py
+++ b/test/test_driver.py
@@ -935,6 +935,29 @@ def test_register_host_memory(self):
         drv.memcpy_htod_async(gpu_ary, a_pin, stream)
         drv.Context.synchronize()
 
+    @mark_cuda_test
+    def test_stream_priority_setting(self):
+        if drv.get_version() < (4,):
+            from py.test import skip
+
+            skip("register_host_memory only exists on CUDA 4.0 and later")
+
+        import sys
+
+        if sys.platform == "darwin":
+            from py.test import skip
+
+            skip("register_host_memory is not supported on OS X")
+
+        a = drv.aligned_empty((2 ** 20,), np.float64)
+        a_pin = drv.register_host_memory(a)
+
+        gpu_ary = drv.mem_alloc_like(a)
+        min_priority, max_priority = drv.get_stream_priority_range()
+        stream = drv.Stream(priority=np.random.choice(range(min_priority, max_priority)))
+        drv.memcpy_htod_async(gpu_ary, a_pin, stream)
+        drv.Context.synchronize()
+
     @mark_cuda_test
     # https://github.com/inducer/pycuda/issues/45
     def test_recursive_launch(self):

From e0dacb1ca486cc8b3f821496bb054a7c5ef9396c Mon Sep 17 00:00:00 2001
From: hyperfraise <damien.menigaux@polytechnique.edu>
Date: Tue, 26 Apr 2022 17:46:27 +0200
Subject: [PATCH 3/3] account for old versions of cuda

---
 src/cpp/cuda.hpp | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp
index fc4481d6..c98f056f 100644
--- a/src/cpp/cuda.hpp
+++ b/src/cpp/cuda.hpp
@@ -861,6 +861,7 @@ namespace pycuda
     return result;
   }
 
+#if CUDAPP_CUDA_VERSION >= 7500
   inline
   py::tuple get_stream_priority_range()
   {
@@ -869,6 +870,7 @@ namespace pycuda
     CUDAPP_CALL_GUARDED(cuCtxGetStreamPriorityRange, (&leastPriority, &greatestPriority));
     return py::make_tuple(leastPriority, greatestPriority);
   }
+#endif
 
 
 
@@ -1006,8 +1008,17 @@ namespace pycuda
       CUstream m_stream;
 
     public:
-      stream(unsigned int flags=0, int priority=0)
-      { CUDAPP_CALL_GUARDED(cuStreamCreateWithPriority, (&m_stream, flags, priority)); }
+
+      #if CUDAPP_CUDA_VERSION >= 7500
+        stream(unsigned int flags=0, int priority=0)
+        { CUDAPP_CALL_GUARDED(cuStreamCreateWithPriority, (&m_stream, flags, priority)); }
+      #else
+        if (priority != 0)
+          throw pycuda::error("stream", CUDA_ERROR_INVALID_HANDLE,
+            "priority!=0 setting isn't supported for your CUDA version");
+        stream(unsigned int flags=0)
+        { CUDAPP_CALL_GUARDED(cuStreamCreate, (&m_stream, flags)); }
+      #endif
 
       ~stream()
       {