Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions benchmarks/src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,18 @@ Benchmarks

## Setting up environment

Create a python environment and install pytest and the compute libraries:
```sh
python -m pip install -r requirements.txt
```

If running `dpnp` with Nvidia or AMD devices, you must install the oneapi toolkit along with the corresponding oneapi pluging:

```sh
# install oneapi toolkit and plugins
source /opt/intel/oneapi/setvars.sh
```

## Benchmark parameters

The benchmark packages, rounds, array sizes, and numeric type may be specified on the constants at the top of [pytest_benchmark/common.py](pytest_benchmark/common.py).
Expand All @@ -20,16 +28,18 @@ These are the steps to run the benchmarks, and produce the graphs

Run the benchmarks and store the results in `results.json`
```sh
pytest .\pytest_benchmark --benchmark-json=results.json
pytest ./pytest_benchmark --benchmark-json=results.json
```

To create graphs and store the timing results after creating the `results.json`, run:
```sh
mkdir img
python graphs.py
```

To modify the tests being shown, modify the `TESTS` list at the top of the `graphs.py` file.
To modify the labels shown, modify `PKG_LABELS`
To modify the legend of the package labels shown, modify `PKG_LABELS`
To modify the name of the tests shown, modify `TESTS_GRAPH_NAME`
To modify the hardware display, modify `HARDWARE`

## Notes
Expand Down
39 changes: 30 additions & 9 deletions benchmarks/src/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
BENCHMARKS_JSON = "results.json"

# Hardware details shown in title
HARDWARE = "AMD Ryzen 9 9900X 12-Core Processor 63032 MB (fp64 fp16)\noneAPI 2025.1.3 Intel(R) OpenCL Graphics: Intel(R) Arc(TM) B580 Graphics, 11873 MB (fp64 fp16)"
HARDWARE = "Intel Xeon Gold 5315Y (8 Processors) @ 3.201GHz 63032 MB\noneAPI 2025.2.1 NVIDIA RTX A4000, 16222 MB, CUDA 12.8 Compute 8.6"

# Show speedup in graph
SHOW_NUMBERS = True
Expand All @@ -16,12 +16,13 @@
ROUND_NUMBERS = 1

# package list in graph order; arrayfire packages are added later
PKG_NAMES = ["numpy", "dpnp", "cupy"]
PKG_NAMES = ["numpy", "dpnp", "cupy", "cupynumeric"]

# color used in graphs
PKG_COLOR = {
"numpy": "tab:blue",
"cupy": "tab:green",
"cupynumeric": "green",
"dpnp": "tab:red",
"afcpu": "tab:orange",
"afopencl": "tab:orange",
Expand All @@ -32,8 +33,9 @@
# labels displayed in the graph
PKG_LABELS = {
"numpy": "numpy[cpu]",
"dpnp": "dpnp[level_zero:gpu]",
"dpnp": "dpnp[cuda:gpu]",
"cupy": "cupy",
"cupynumeric": "cupynumeric",
"afcpu": "afcpu",
"afcuda": "afcuda",
"afopencl": "afopencl[opencl:gpu]",
Expand All @@ -44,16 +46,16 @@

# Tests to be shown in graphs
TESTS = [
"qr",
"group_elementwise",
"neural_network",
"gemm",
"black_scholes",
"mandelbrot",
"nbody",
"pi",
"black_scholes",
"fft",
"normal",
"group_elementwise",
"gemm",
"fft",
"qr",
# Other tests
# 'svd
# 'cholesky',
Expand All @@ -63,6 +65,25 @@
# 'inv'
]

# Reverse list so it appears in order on graph
TESTS.reverse()

TESTS_GRAPH_NAME = {
"group_elementwise": "Group_elementwise (JIT)",
"neural_network": "Neural Network (JIT)",
"black_scholes": "Black Scholes (JIT)",
"mandelbrot": "Mandelbrot (JIT)",
"nbody": "Nbody (JIT)",
"pi": "Montecarlo Pi (JIT)",
"normal": "Normal Distribution",
"gemm": "General Matrix Multiplication",
"fft": "2D FFT",
"qr": "QR Decomposition",
}

for name in TESTS:
if name not in TESTS_GRAPH_NAME:
TESTS_GRAPH_NAME[name] = name

def get_benchmark_data():
results = {}
Expand Down Expand Up @@ -189,7 +210,7 @@ def generate_group_graph(test_list=None, show_numbers=False, filename="compariso

xlabels = []
for test in tests:
xlabels.append(test + "\n" + descriptions[test])
xlabels.append(TESTS_GRAPH_NAME[test] + "\n" + descriptions[test])

ax.set_xlabel("Speedup")
ax.set_xscale("log")
Expand Down
20 changes: 11 additions & 9 deletions benchmarks/src/pytest_benchmark/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import math

import cupy
import cupynumeric
import dpctl
import dpnp
import numpy as np
Expand All @@ -38,19 +39,19 @@

# modify parameters for most benchmarks
ROUNDS = 30
NSIZE = 2**13
NSIZE = 2**11
NNSIZE = NSIZE**2
DTYPE = "float32"

# comment a line to remove that package from testing
PKGDICT = {
"dpnp": dpnp,
"numpy": np,
"cupy": cupy,
# "afcpu": af,
"afopencl": af,
"afcuda": af,
"afoneapi": af,
"dpnp": dpnp,
"cupynumeric": cupynumeric,
}

PKGS = []
Expand All @@ -66,11 +67,13 @@ def initialize_package(PKG_ID):
pkg = PKGDICT[PKG_ID]

try:
# Free all unused memory
gc.collect()
af.device_gc()
mempool = cupy.get_default_memory_pool()
mempool.free_all_blocks()
except:
pass
except Exception as e:
print(e)

if PKG_ID == "afcpu":
af.set_backend(af.BackendType.cpu)
Expand Down Expand Up @@ -98,8 +101,7 @@ def initialize_package(PKG_ID):
print(cupy.cuda.Device())
mempool = cupy.get_default_memory_pool()
mempool.free_all_blocks()
elif PKG_ID == "cupynumeric":
pass
else:
raise NotImplementedError()

# Free all unused memory
gc.collect()
raise NotImplementedError()
27 changes: 27 additions & 0 deletions benchmarks/src/pytest_benchmark/test_blackscholes.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,29 @@ def cnd(x):

return (C, P)

def black_scholes_cupynumeric(S, X, R, V, T):
# S = Underlying stock price
# X = Strike Price
# R = Risk free rate of interest
# V = Volatility
# T = Time to maturity
def cnd(x):
temp = x > 0
erf = lambda arr: cupynumeric.exp(-arr * arr)
return temp * (0.5 + erf(x / sqrt2) / 2) + (1 - temp) * (0.5 - erf((-x) / sqrt2) / 2)

d1 = cupynumeric.log(S / X)
d1 = d1 + (R + (V * V) * 0.5) * T
d1 = d1 / (V * cupynumeric.sqrt(T))

d2 = d1 - (V * cupynumeric.sqrt(T))
cnd_d1 = cnd(d1)
cnd_d2 = cnd(d2)

C = S * cnd_d1 - (X * cupynumeric.exp((-R) * T) * cnd_d2)
P = X * cupynumeric.exp((-R) * T) * (1 - cnd_d2) - (S * (1 - cnd_d1))

return (C, P)

def black_scholes_arrayfire(S, X, R, V, T):
def cnd(x):
Expand Down Expand Up @@ -137,6 +160,9 @@ def generate_arrays(pkgid, count):
elif "numpy" == pkg:
for i in range(count):
arr_list.append(np.random.rand(NSIZE, NSIZE).astype(DTYPE))
elif "cupynumeric" == pkg:
for i in range(count):
arr_list.append(cupynumeric.random.rand(NSIZE, NSIZE).astype(DTYPE))

return arr_list

Expand All @@ -146,4 +172,5 @@ def generate_arrays(pkgid, count):
"numpy": black_scholes_numpy,
"cupy": black_scholes_cupy,
"arrayfire": black_scholes_arrayfire,
"cupynumeric": black_scholes_cupynumeric
}
5 changes: 4 additions & 1 deletion benchmarks/src/pytest_benchmark/test_elementwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def func_cupy(arr):
cupy.cuda.runtime.deviceSynchronize()
return x

GROUP_FUNCS = {"numpy": func, "cupy": func_cupy, "arrayfire": func_af, "dpnp": func}
GROUP_FUNCS = {"numpy": func, "cupy": func_cupy, "arrayfire": func_af, "dpnp": func, "cupynumeric": func}

benchmark.extra_info["description"] = f"{NSIZE}x{NSIZE} Matrix"
result = benchmark.pedantic(
Expand Down Expand Up @@ -312,5 +312,8 @@ def generate_arrays(pkgid, count):
elif "numpy" == pkg:
for i in range(count):
arr_list.append(np.random.rand(NSIZE, NSIZE).astype(DTYPE))
elif "cupynumeric" == pkg:
for i in range(count):
arr_list.append(cupynumeric.random.rand(NSIZE, NSIZE).astype(DTYPE))

return arr_list
7 changes: 6 additions & 1 deletion benchmarks/src/pytest_benchmark/test_fft.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ def generate_arrays(pkgid, count):
elif "numpy" == pkg:
for i in range(count):
arr_list.append(np.random.rand(NSIZE, NSIZE).astype(DTYPE))
elif "cupynumeric" == pkg:
for i in range(count):
arr_list.append(cupynumeric.random.rand(NSIZE, NSIZE).astype(DTYPE))

return arr_list

Expand Down Expand Up @@ -86,5 +89,7 @@ def fft_cupy(arr):
cupy.cuda.runtime.deviceSynchronize()
return res

def fft_cupynumeric(arr):
return cupynumeric.fft.fft(arr)

FUNCS = {"dpnp": fft_dpnp, "numpy": fft_np, "cupy": fft_cupy, "arrayfire": fft_af}
FUNCS = {"dpnp": fft_dpnp, "numpy": fft_np, "cupy": fft_cupy, "arrayfire": fft_af, "cupynumeric": fft_cupynumeric}
7 changes: 6 additions & 1 deletion benchmarks/src/pytest_benchmark/test_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def generate_arrays(pkgid, count):
np.random.rand(1)
for i in range(count):
arr_list.append(np.random.rand(NSIZE, NSIZE).astype(DTYPE))
elif "cupynumeric" == pkg:
for i in range(count):
arr_list.append(cupynumeric.random.rand(NSIZE, NSIZE).astype(DTYPE))

return arr_list

Expand Down Expand Up @@ -117,5 +120,7 @@ def gemm_cupy(A, B, C):
cupy.cuda.runtime.deviceSynchronize()
return C

def gemm_cupynumeric(A, B, C):
return alpha * cupynumeric.matmul(A, B) + beta * C

FUNCS = {"numpy": gemm_np, "cupy": gemm_cupy, "arrayfire": gemm_af, "dpnp": gemm_dpnp}
FUNCS = {"numpy": gemm_np, "cupy": gemm_cupy, "arrayfire": gemm_af, "dpnp": gemm_dpnp, "cupynumeric": gemm_cupynumeric}
91 changes: 90 additions & 1 deletion benchmarks/src/pytest_benchmark/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ class TestKmeans:
def test_kmeans(self, benchmark, pkgid):
initialize_package(pkgid)
pkg = PKGDICT[pkgid]
kmean_class = {"dpnp": kmeans_dpnp, "numpy": kmeans_numpy, "cupy": kmeans_cupy, "arrayfire": kmeans_af}
kmean_class = {"dpnp": kmeans_dpnp, "numpy": kmeans_numpy, "cupy": kmeans_cupy, "arrayfire": kmeans_af,
"cupynumeric": kmeans_cupynumeric}
obj = kmean_class[pkg.__name__]()

benchmark.extra_info["description"] = f"{NSAMPLES}x{NFEATURES} over {K} centers"
Expand Down Expand Up @@ -189,6 +190,94 @@ def kmeans(self):
return centroids, cluster_assignments



class kmeans_cupynumeric:
def __init__(self):
self.data = cupynumeric.random.random((NSAMPLES, NFEATURES))
self.centroid_indices = cupynumeric.random.choice(self.data.shape[0], K, replace=False)

def initialize_centroids(self):
"""
Randomly initializes k centroids from the data points.

Args:
data (np.ndarray): The input data points (n_samples, n_features).
k (int): The number of clusters.

Returns:
np.ndarray: Initial centroids (k, n_features).
"""

return self.data[self.centroid_indices, :]

def assign_to_clusters(self, centroids):
"""
Assigns each data point to the closest centroid.

Args:
data (np.ndarray): The input data points (n_samples, n_features).
centroids (np.ndarray): The current centroids (k, n_features).

Returns:
np.ndarray: An array of cluster assignments for each data point (n_samples,).
"""
distances = cupynumeric.sqrt(((self.data[:, cupynumeric.newaxis, :] - centroids[cupynumeric.newaxis, :, :]) ** 2).sum(axis=2))
cluster_assignments = cupynumeric.argmin(distances, axis=1)
return cluster_assignments

def update_centroids(self, cluster_assignments):
"""
Recalculates the centroids based on the mean of the assigned data points.

Args:
data (np.ndarray): The input data points (n_samples, n_features).
cluster_assignments (np.ndarray): An array of cluster assignments.
k (int): The number of clusters.

Returns:
np.ndarray: Updated centroids (k, n_features).
"""
new_centroids = cupynumeric.zeros((K, self.data.shape[1]))
for i in range(K):
points_in_cluster = self.data[cluster_assignments == i]
if len(points_in_cluster) > 0:
new_centroids[i] = cupynumeric.mean(points_in_cluster, axis=0)
return new_centroids

def kmeans(self):
"""
Performs the K-Means clustering algorithm.

Args:
data (np.ndarray): The input data points (n_samples, n_features).
k (int): The number of clusters.
max_iterations (int): Maximum number of iterations to run the algorithm.
tolerance (float): The tolerance for convergence (change in centroids).

Returns:
tuple: A tuple containing:
- np.ndarray: Final centroids (k, n_features).
- np.ndarray: Final cluster assignments for each data point (n_samples,).
"""
centroids = self.initialize_centroids()
cluster_assignments = None

for i in range(ITERATIONS):
old_centroids = cupynumeric.copy(centroids)

# E-step: Assign points to clusters
cluster_assignments = self.assign_to_clusters(centroids)

# M-step: Update centroids
centroids = self.update_centroids(cluster_assignments)

# Check for convergence
if cupynumeric.linalg.norm(centroids - old_centroids) < TOLERANCE:
break

return centroids, cluster_assignments


class kmeans_af:
def __init__(self):
self.data = af.Array(np.random.random((NSAMPLES, NFEATURES)).flatten().tolist(), shape=(NSAMPLES, NFEATURES))
Expand Down
Loading
Loading