arrayfire · syurkevi · Sep 18, 2025 · Sep 18, 2025 · Sep 18, 2025
diff --git a/LICENSE b/LICENSE
diff --git a/benchmarks/src/graphs.py b/benchmarks/src/graphs.py
@@ -85,6 +85,7 @@
     if name not in TESTS_GRAPH_NAME:
         TESTS_GRAPH_NAME[name] = name
 
+
 def get_benchmark_data():
     results = {}
     descriptions = {}

diff --git a/benchmarks/src/pytest_benchmark/common.py b/benchmarks/src/pytest_benchmark/common.py
@@ -104,4 +104,4 @@ def initialize_package(PKG_ID):
     elif PKG_ID == "cupynumeric":
         pass
     else:
-        raise NotImplementedError()
+        raise NotImplementedError()
diff --git a/benchmarks/src/pytest_benchmark/test_blackscholes.py b/benchmarks/src/pytest_benchmark/test_blackscholes.py
@@ -92,6 +92,7 @@ def cnd(x):
 
     return (C, P)
 
+
 def black_scholes_cupynumeric(S, X, R, V, T):
     # S = Underlying stock price
     # X = Strike Price
@@ -116,6 +117,7 @@ def cnd(x):
 
     return (C, P)
 
+
 def black_scholes_arrayfire(S, X, R, V, T):
     def cnd(x):
         temp = x > 0
@@ -172,5 +174,5 @@ def generate_arrays(pkgid, count):
     "numpy": black_scholes_numpy,
     "cupy": black_scholes_cupy,
     "arrayfire": black_scholes_arrayfire,
-    "cupynumeric": black_scholes_cupynumeric
+    "cupynumeric": black_scholes_cupynumeric,
 }
diff --git a/benchmarks/src/pytest_benchmark/test_fft.py b/benchmarks/src/pytest_benchmark/test_fft.py
@@ -89,7 +89,9 @@ def fft_cupy(arr):
     cupy.cuda.runtime.deviceSynchronize()
     return res
 
+
 def fft_cupynumeric(arr):
     return cupynumeric.fft.fft(arr)
 
+
 FUNCS = {"dpnp": fft_dpnp, "numpy": fft_np, "cupy": fft_cupy, "arrayfire": fft_af, "cupynumeric": fft_cupynumeric}
diff --git a/benchmarks/src/pytest_benchmark/test_gemm.py b/benchmarks/src/pytest_benchmark/test_gemm.py
@@ -120,7 +120,9 @@ def gemm_cupy(A, B, C):
     cupy.cuda.runtime.deviceSynchronize()
     return C
 
+
 def gemm_cupynumeric(A, B, C):
     return alpha * cupynumeric.matmul(A, B) + beta * C
 
+
 FUNCS = {"numpy": gemm_np, "cupy": gemm_cupy, "arrayfire": gemm_af, "dpnp": gemm_dpnp, "cupynumeric": gemm_cupynumeric}
diff --git a/benchmarks/src/pytest_benchmark/test_kmeans.py b/benchmarks/src/pytest_benchmark/test_kmeans.py
@@ -12,8 +12,13 @@ class TestKmeans:
     def test_kmeans(self, benchmark, pkgid):
         initialize_package(pkgid)
         pkg = PKGDICT[pkgid]
-        kmean_class = {"dpnp": kmeans_dpnp, "numpy": kmeans_numpy, "cupy": kmeans_cupy, "arrayfire": kmeans_af,
-        "cupynumeric": kmeans_cupynumeric}
+        kmean_class = {
+            "dpnp": kmeans_dpnp,
+            "numpy": kmeans_numpy,
+            "cupy": kmeans_cupy,
+            "arrayfire": kmeans_af,
+            "cupynumeric": kmeans_cupynumeric,
+        }
         obj = kmean_class[pkg.__name__]()
 
         benchmark.extra_info["description"] = f"{NSAMPLES}x{NFEATURES} over {K} centers"
@@ -190,7 +195,6 @@ def kmeans(self):
         return centroids, cluster_assignments
 
 
-
 class kmeans_cupynumeric:
     def __init__(self):
         self.data = cupynumeric.random.random((NSAMPLES, NFEATURES))
@@ -221,7 +225,9 @@ def assign_to_clusters(self, centroids):
         Returns:
             np.ndarray: An array of cluster assignments for each data point (n_samples,).
         """
-        distances = cupynumeric.sqrt(((self.data[:, cupynumeric.newaxis, :] - centroids[cupynumeric.newaxis, :, :]) ** 2).sum(axis=2))
+        distances = cupynumeric.sqrt(
+            ((self.data[:, cupynumeric.newaxis, :] - centroids[cupynumeric.newaxis, :, :]) ** 2).sum(axis=2)
+        )
         cluster_assignments = cupynumeric.argmin(distances, axis=1)
         return cluster_assignments
 

diff --git a/benchmarks/src/pytest_benchmark/test_linalg.py b/benchmarks/src/pytest_benchmark/test_linalg.py
@@ -93,9 +93,11 @@ def svd_cupy(arr):
     cupy.cuda.runtime.deviceSynchronize()
     return x
 
+
 def svd_cupynumeric(arr):
     return cupynumeric.linalg.svd(arr)
 
+
 def qr_np(arr):
     return np.linalg.qr(arr)
 
@@ -117,9 +119,11 @@ def qr_cupy(arr):
     cupy.cuda.runtime.deviceSynchronize()
     return x
 
+
 def qr_cupynumeric(arr):
     return cupynumeric.linalg.qr(arr)
 
+
 def cholesky_np(arr):
     return np.linalg.cholesky(arr)
 
@@ -140,6 +144,7 @@ def cholesky_cupy(arr):
     cupy.cuda.runtime.deviceSynchronize()
     return x
 
+
 def cholesky_cupynumeric(arr):
     return cupynumeric.linalg.cholesky(arr)
 
@@ -164,9 +169,11 @@ def inv_cupy(arr):
     cupy.cuda.runtime.deviceSynchronize()
     return x
 
+
 def inv_cupynumeric(arr):
     return cupynumeric.linalg.inv(arr)
 
+
 def det_np(arr):
     return np.linalg.det(arr)
 
@@ -186,9 +193,11 @@ def det_cupy(arr):
     cupy.cuda.runtime.deviceSynchronize()
     return x
 
+
 def det_cupynumeric(arr):
     return cupynumeric.linalg.det(arr)
 
+
 def norm_np(arr):
     return np.linalg.norm(arr)
 
@@ -208,9 +217,11 @@ def norm_cupy(arr):
     cupy.cuda.runtime.deviceSynchronize()
     return x
 
+
 def norm_cupynumeric(arr):
     return cupynumeric.linalg.norm(arr)
 
+
 @pytest.mark.parametrize("pkgid", IDS, ids=IDS)
 class TestLinalg:
     def test_cholesky(self, benchmark, pkgid):
@@ -220,8 +231,13 @@ def test_cholesky(self, benchmark, pkgid):
         benchmark.extra_info["description"] = f"{NSIZE}x{NSIZE} Matrix"
         pkg = PKGDICT[pkgid]
 
-        CHOLESKY_FUNCS = {"numpy": cholesky_np, "cupy": cholesky_cupy, "arrayfire": cholesky_af, "dpnp": cholesky_dpnp, 
-            "cupynumeric": cholesky_cupynumeric }
+        CHOLESKY_FUNCS = {
+            "numpy": cholesky_np,
+            "cupy": cholesky_cupy,
+            "arrayfire": cholesky_af,
+            "dpnp": cholesky_dpnp,
+            "cupynumeric": cholesky_cupynumeric,
+        }
         result = benchmark.pedantic(
             target=CHOLESKY_FUNCS[pkg.__name__], setup=setup, rounds=ROUNDS, iterations=ITERATIONS
         )
@@ -233,8 +249,13 @@ def test_svd(self, benchmark, pkgid):
         benchmark.extra_info["description"] = f"{NSIZE}x{NSIZE} Matrix"
         pkg = PKGDICT[pkgid]
 
-        SVD_FUNCS = {"numpy": svd_np, "cupy": svd_cupy, "arrayfire": svd_af, "dpnp": svd_dpnp, 
-            "cupynumeric": svd_cupynumeric }
+        SVD_FUNCS = {
+            "numpy": svd_np,
+            "cupy": svd_cupy,
+            "arrayfire": svd_af,
+            "dpnp": svd_dpnp,
+            "cupynumeric": svd_cupynumeric,
+        }
         result = benchmark.pedantic(target=SVD_FUNCS[pkg.__name__], setup=setup, rounds=ROUNDS, iterations=ITERATIONS)
 
     def test_qr(self, benchmark, pkgid):
@@ -244,8 +265,13 @@ def test_qr(self, benchmark, pkgid):
         benchmark.extra_info["description"] = f"{NSIZE}x{NSIZE} Matrix"
         pkg = PKGDICT[pkgid]
 
-        QR_FUNCS = {"numpy": qr_np, "cupy": qr_cupy, "arrayfire": qr_af, "dpnp": qr_dpnp, 
-            "cupynumeric": qr_cupynumeric }
+        QR_FUNCS = {
+            "numpy": qr_np,
+            "cupy": qr_cupy,
+            "arrayfire": qr_af,
+            "dpnp": qr_dpnp,
+            "cupynumeric": qr_cupynumeric,
+        }
         result = benchmark.pedantic(target=QR_FUNCS[pkg.__name__], setup=setup, rounds=ROUNDS, iterations=ITERATIONS)
 
     def test_inv(self, benchmark, pkgid):
@@ -255,8 +281,13 @@ def test_inv(self, benchmark, pkgid):
         benchmark.extra_info["description"] = f"{NSIZE}x{NSIZE} Matrix"
         pkg = PKGDICT[pkgid]
 
-        INV_FUNCS = {"numpy": inv_np, "cupy": inv_cupy, "arrayfire": inv_af, "dpnp": inv_dpnp, 
-            "cupynumeric": inv_cupynumeric }
+        INV_FUNCS = {
+            "numpy": inv_np,
+            "cupy": inv_cupy,
+            "arrayfire": inv_af,
+            "dpnp": inv_dpnp,
+            "cupynumeric": inv_cupynumeric,
+        }
         result = benchmark.pedantic(target=INV_FUNCS[pkg.__name__], setup=setup, rounds=ROUNDS, iterations=ITERATIONS)
 
     def test_det(self, benchmark, pkgid):
@@ -266,8 +297,13 @@ def test_det(self, benchmark, pkgid):
         benchmark.extra_info["description"] = f"{NSIZE}x{NSIZE} Matrix"
         pkg = PKGDICT[pkgid]
 
-        DET_FUNCS = {"numpy": det_np, "cupy": det_cupy, "arrayfire": det_af, "dpnp": det_dpnp, 
-            "cupynumeric": det_cupynumeric }
+        DET_FUNCS = {
+            "numpy": det_np,
+            "cupy": det_cupy,
+            "arrayfire": det_af,
+            "dpnp": det_dpnp,
+            "cupynumeric": det_cupynumeric,
+        }
         result = benchmark.pedantic(target=DET_FUNCS[pkg.__name__], setup=setup, rounds=ROUNDS, iterations=ITERATIONS)
 
     def test_norm(self, benchmark, pkgid):
@@ -277,6 +313,11 @@ def test_norm(self, benchmark, pkgid):
         benchmark.extra_info["description"] = f"{NSIZE}x{NSIZE} Matrix"
         pkg = PKGDICT[pkgid]
 
-        NORM_FUNCS = {"numpy": norm_np, "cupy": norm_cupy, "arrayfire": norm_af, "dpnp": norm_dpnp, 
-            "cupynumeric": norm_cupynumeric }
+        NORM_FUNCS = {
+            "numpy": norm_np,
+            "cupy": norm_cupy,
+            "arrayfire": norm_af,
+            "dpnp": norm_dpnp,
+            "cupynumeric": norm_cupynumeric,
+        }
         result = benchmark.pedantic(target=NORM_FUNCS[pkg.__name__], setup=setup, rounds=ROUNDS, iterations=ITERATIONS)
diff --git a/benchmarks/src/pytest_benchmark/test_mandelbrot.py b/benchmarks/src/pytest_benchmark/test_mandelbrot.py
@@ -142,6 +142,7 @@ def mandelbrot_cupy():
     cupy.cuda.runtime.deviceSynchronize()
     return Z_, N_
 
+
 def mandelbrot_cupynumeric():
     # Adapted from
     # https://thesamovar.wordpress.com/2009/03/22/fast-fractals-with-python-and-numpy/
@@ -176,6 +177,7 @@ def mandelbrot_cupynumeric():
 
     return Z_.T, N_.T
 
+
 def mandelbrot_af():
     Xi = af.flat(af.range((xn, yn), axis=0, dtype=af.int64))
     Yi = af.flat(af.range((xn, yn), axis=1, dtype=af.int64))
@@ -221,4 +223,10 @@ def mandelbrot_af():
     return Z_, N_
 
 
-FUNCS = {"dpnp": mandelbrot_dpnp, "numpy": mandelbrot_np, "cupy": mandelbrot_cupy, "arrayfire": mandelbrot_af, "cupynumeric" : mandelbrot_cupynumeric}
+FUNCS = {
+    "dpnp": mandelbrot_dpnp,
+    "numpy": mandelbrot_np,
+    "cupy": mandelbrot_cupy,
+    "arrayfire": mandelbrot_af,
+    "cupynumeric": mandelbrot_cupynumeric,
+}
diff --git a/benchmarks/src/pytest_benchmark/test_montecarlo_pi.py b/benchmarks/src/pytest_benchmark/test_montecarlo_pi.py
@@ -47,11 +47,17 @@ def calc_pi_dpnp(samples):
     y = dpnp.random.rand(samples).astype(dpnp.float32)
     return 4.0 * dpnp.sum(in_circle(x, y)) / samples
 
+
 def calc_pi_cupynumeric(samples):
     x = cupynumeric.random.rand(samples).astype(cupynumeric.float32)
     y = cupynumeric.random.rand(samples).astype(cupynumeric.float32)
     return 4.0 * cupynumeric.sum(in_circle(x, y)) / samples
 
 
-FUNCS = {"dpnp": calc_pi_dpnp, "numpy": calc_pi_numpy, "cupy": calc_pi_cupy, "arrayfire": calc_pi_af,
- "cupynumeric": calc_pi_cupynumeric }
+FUNCS = {
+    "dpnp": calc_pi_dpnp,
+    "numpy": calc_pi_numpy,
+    "cupy": calc_pi_cupy,
+    "arrayfire": calc_pi_af,
+    "cupynumeric": calc_pi_cupynumeric,
+}
diff --git a/benchmarks/src/pytest_benchmark/test_nn.py b/benchmarks/src/pytest_benchmark/test_nn.py
@@ -282,6 +282,7 @@ def train(self):
     def predict(self, X):
         return cupy.argmax(self.forward(X), axis=1)
 
+
 class NeuralNetwork_cupynumeric:
     def __init__(self):
         self.input_size = INPUT_SIZE
@@ -293,13 +294,16 @@ def __init__(self):
         # He initialization (for ReLU) is often a good choice
         self.W1 = cupynumeric.random.randn(self.input_size, self.hidden_size) * cupynumeric.sqrt(2.0 / self.input_size)
         self.b1 = cupynumeric.zeros((1, self.hidden_size))
-        self.W2 = cupynumeric.random.randn(self.hidden_size, self.output_size) * cupynumeric.sqrt(2.0 / self.hidden_size)
+        self.W2 = cupynumeric.random.randn(self.hidden_size, self.output_size) * cupynumeric.sqrt(
+            2.0 / self.hidden_size
+        )
         self.b2 = cupynumeric.zeros((1, self.output_size))
 
         self.X_train = cupynumeric.random.rand(SAMPLES, INPUT_SIZE)
         self.y_train = cupynumeric.zeros((SAMPLES * OUTPUT_SIZE))
         self.y_train[
-            cupynumeric.arange(SAMPLES) * OUTPUT_SIZE + cupynumeric.floor(cupynumeric.random.rand(SAMPLES) * OUTPUT_SIZE).astype(int)
+            cupynumeric.arange(SAMPLES) * OUTPUT_SIZE
+            + cupynumeric.floor(cupynumeric.random.rand(SAMPLES) * OUTPUT_SIZE).astype(int)
         ] = 1
         self.y_train = self.y_train.reshape((SAMPLES, OUTPUT_SIZE))
 
@@ -310,7 +314,9 @@ def relu_derivative(self, x):
         return (x > 0).astype(float)
 
     def softmax(self, x):
-        exp_scores = cupynumeric.exp(x - cupynumeric.max(x, axis=1, keepdims=True))  # Subtract max for numerical stability
+        exp_scores = cupynumeric.exp(
+            x - cupynumeric.max(x, axis=1, keepdims=True)
+        )  # Subtract max for numerical stability
         return exp_scores / cupynumeric.sum(exp_scores, axis=1, keepdims=True)
 
     def forward(self, X):
@@ -364,6 +370,7 @@ def train(self):
     def predict(self, X):
         return cupynumeric.argmax(self.forward(X), axis=1)
 
+
 class NeuralNetwork_af:
     def __init__(self):
         self.input_size = INPUT_SIZE