Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
9054793
move prettytable into inc.common
xin3he Jun 24, 2024
fd510db
add benchmark
xin3he Jun 25, 2024
29f974c
support windows
xin3he Jun 26, 2024
dabe436
fix bug
xin3he Jun 26, 2024
4f7cb7c
enable subprocess running
xin3he Jun 26, 2024
4a3b6cd
fix bug in windows
xin3he Jun 26, 2024
3cc3885
enhance log
xin3he Jun 26, 2024
b3c1091
add document
xin3he Jun 26, 2024
ca1f3b6
update platform status
xin3he Jun 26, 2024
29ebf1a
add incbench dlrm example
xin3he Jun 27, 2024
5960bb7
add more docstring
xin3he Jun 27, 2024
4c15bda
add performance test for sq opt-125m
xin3he Jun 28, 2024
60340f2
enhance pre-commit for max-line-length check
xin3he Jun 28, 2024
5f02407
add Multiple Instance Benchmark Summary
xin3he Jun 28, 2024
cc014af
Dump Throughput and Latency Summary
xin3he Jun 28, 2024
c3de633
change log folder and add UTs
xin3he Jul 1, 2024
9757779
add requirement
xin3he Jul 1, 2024
6ca810f
Merge branch 'master' into xinhe/benchmark
xin3he Jul 2, 2024
8549e92
improve UT coverage
xin3he Jul 3, 2024
0f6e057
fix pylint
xin3he Jul 3, 2024
7f3aff5
remove previous useless code
xin3he Jul 8, 2024
eeb56f6
fix bug
xin3he Jul 8, 2024
24ec333
fix pylint
xin3he Jul 8, 2024
18ca594
fix bug
xin3he Jul 8, 2024
b55b22b
Merge branch 'master' into xinhe/benchmark
chensuyue Jul 9, 2024
a524d9c
update summary format per suyue's request
xin3he Jul 9, 2024
245c75a
fdsa
xin3he Jul 9, 2024
81687bd
revert pre-commit change
xin3he Jul 9, 2024
d681fc7
Merge branch 'master' into xinhe/benchmark
xin3he Jul 10, 2024
7e73d1a
update UT
xin3he Jul 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
improve UT coverage
Signed-off-by: xin3he <[email protected]>
  • Loading branch information
xin3he committed Jul 3, 2024
commit 8549e924aa69ef0353e91d982d6abc874140567f
26 changes: 16 additions & 10 deletions neural_compressor/common/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def get_linux_numa_info():
}

# if numa_info is not collected, we go back to socket_info
if not numa_info:
if not numa_info: # pragma: no cover
for line in output.splitlines():
# demo: "Socket(s): 2"
socket_match = re.match(r"^Socket\(s\):\s+(.*)$", line)
Expand Down Expand Up @@ -117,6 +117,7 @@ def get_windows_numa_info():
}
"""
# pylint: disable=import-error
# pragma: no cover
import wmi

c = wmi.WMI()
Expand Down Expand Up @@ -157,11 +158,11 @@ def dump_numa_info():
Returns:
numa_info (dict): {numa_node_index: list of Physical CPUs in this numa node, ...}
"""
if psutil.WINDOWS:
if psutil.WINDOWS: # pragma: no cover
numa_info = get_windows_numa_info()
elif psutil.LINUX:
numa_info = get_linux_numa_info()
else:
else: # pragma: no cover
logger.error(f"Unsupported platform detected: {sys.platform}, only supported on Linux and Windows")

# dump stats to shell
Expand All @@ -187,19 +188,19 @@ def parse_str2list(cpu_ranges):
try:
start, end = r.split("-")
cpus.extend(range(int(start), int(end) + 1))
except ValueError:
except ValueError: # pragma: no cover
raise ValueError(f"Invalid range: {r}")
else:
try:
cpus.append(int(r))
except ValueError:
except ValueError: # pragma: no cover
raise ValueError(f"Invalid number: {r}")
return cpus


def format_list2str(cpus):
"""Format [0,1,2,3,4,7,8] back to '0-4,7,8' for human readable."""
if not cpus:
if not cpus: # pragma: no cover
return ""
cpus = sorted(set(cpus))
ranges = []
Expand Down Expand Up @@ -268,7 +269,7 @@ def set_cores_for_instance(args, numa_info):
target_cores = args.num_instances * args.num_cores_per_instance
assert target_cores <= len(
available_cores_list
), "num_instances * num_cores_per_instance = {} exceeds the range of physical CPUs:{}.".format(
), "Invalid configuration: num_instances * num_cores_per_instance = {} exceeds the range of physical CPUs:{}.".format(
target_cores, len(available_cores_list)
)
cores_list = list(range(target_cores))
Expand All @@ -287,14 +288,19 @@ def set_cores_for_instance(args, numa_info):
target_cores = args.num_instances * args.num_cores_per_instance
assert target_cores <= len(
cores_list
), "num_instances * num_cores_per_instance = {} exceeds the range of available CPUs:{}.".format(
), "Invalid configuration: num_instances * num_cores_per_instance = {} exceeds the range of available CPUs:{}.".format(
target_cores, len(cores_list)
)
cores_list = cores_list[:target_cores]

# preprocess args.num_instances to set default values
if args.num_instances is None:
if args.num_cores_per_instance:
assert args.num_cores_per_instance <= len(
cores_list
), "Invalid configuration: num_cores_per_instance = {} exceeds the number of available CPUs = {}.".format(
args.num_cores_per_instance, len(cores_list)
)
args.num_instances = len(cores_list) // args.num_cores_per_instance
target_cores = args.num_instances * args.num_cores_per_instance
cores_list = cores_list[:target_cores]
Expand All @@ -308,7 +314,7 @@ def set_cores_for_instance(args, numa_info):
else:
logger.info("{} instances are triggered.".format(args.num_instances), highlight=True)
if len(cores_list) == 1:
logger.info("Only 1 core is in use.", highlight=True)
logger.info("1 core is in use.", highlight=True)
else:
logger.info("{} cores are in use.".format(len(cores_list)), highlight=True)

Expand Down Expand Up @@ -362,7 +368,7 @@ def generate_prefix(args, core_list):

hex_core = hex(reduce(lambda x, y: x | y, [1 << p for p in parse_str2list(core_list[1])]))
return "start /B /WAIT /node {} /affinity {}".format(socket_id, hex_core)
else:
else: # pragma: no cover
return ""


Expand Down
83 changes: 67 additions & 16 deletions test/3x/common/test_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,43 @@
import os
import re
import shutil
import subprocess

from neural_compressor.common.utils import DEFAULT_WORKSPACE

# build files during test process to test benchmark
tmp_file_dict = {}
tmp = """
# build file:tmp.py during test process to test benchmark
print("test benchmark")
"""
tmp_path = "./tmp.py"
tmp_file_dict["./tmp/tmp.py"] = tmp

tmp = """
print("test benchmark")
print("Throughput: 1 samples/sec")
print("Latency: 1000 ms")
"""
tmp_file_dict["./tmp/throughput_latency.py"] = tmp

tmp = """
print("test benchmark")
print("Throughput: 2 tokens/sec")
"""
tmp_file_dict["./tmp/throughput.py"] = tmp

tmp = """
print("test benchmark")
print("Latency: 10 ms")
"""
tmp_file_dict["./tmp/latency.py"] = tmp


def build_tmp_file():
f = open(tmp_path, "w")
f.write(tmp)
f.close()
os.makedirs("./tmp")
for tmp_path, tmp in tmp_file_dict.items():
f = open(tmp_path, "w")
f.write(tmp)
f.close()


def trigger_process(cmd):
Expand All @@ -37,7 +60,7 @@ def check_main_process(message):


def check_log_file(log_file_path):
output_pattern = r"(.*)test benchmark"
output_pattern = r"(.*)test benchmark(.*)"
with open(log_file_path, "r") as f:
output = f.read()
f.close()
Expand All @@ -49,34 +72,35 @@ def setup_class(self):
build_tmp_file()

def teardown_class(self):
os.remove(tmp_path)
shutil.rmtree("./tmp")
shutil.rmtree("nc_workspace")

def test_default(self):
cmd = "incbench tmp.py"
cmd = "incbench tmp/tmp.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
assert num_i == 1, "the number of instance should be 1."
assert check_log_file(log_file_path), "instance output is not correct."

def test_only_num_i(self):
cmd = "incbench --num_i 2 tmp.py"
cmd = "incbench --num_i 2 tmp/tmp.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
assert num_i == 2, "the number of instance should be 2."
assert check_log_file(log_file_path), "instance output is not correct."

def test_only_num_c(self):
cmd = "incbench --num_c 1 tmp.py"
cmd = "incbench --num_c 1 tmp/tmp.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
assert num_i == all_c, "the number of instance should equal the number of available cores."
assert check_log_file(log_file_path), "instance output is not correct."

def test_only_cores(self):
cmd = "incbench -C 0-1 tmp.py"
cmd = "incbench -C 0-1 tmp/tmp.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
Expand All @@ -85,7 +109,7 @@ def test_only_cores(self):
assert check_log_file(log_file_path), "instance output is not correct."

def test_num_i_num_c(self):
cmd = "incbench --num_i 2 --num_c 2 tmp.py"
cmd = "incbench --num_i 2 --num_c 2 tmp/tmp.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
Expand All @@ -94,7 +118,7 @@ def test_num_i_num_c(self):
assert check_log_file(log_file_path), "instance output is not correct."

def test_num_i_cores(self):
cmd = "incbench --num_i 2 -C 1-5 tmp.py"
cmd = "incbench --num_i 2 -C 0-2,5,8 tmp/tmp.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
Expand All @@ -103,16 +127,43 @@ def test_num_i_cores(self):
assert check_log_file(log_file_path), "instance output is not correct."

def test_num_c_cores(self):
cmd = "incbench --num_c 2 -C 0-6 tmp.py"
cmd = "incbench --num_c 2 -C 0-6 tmp/tmp.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
assert num_i == 3, "the number of instance should be all_c//num_c=3."
assert all_c == 6, "the number of available cores should be (all_c//num_c)*num_c=6."
assert check_log_file(log_file_path), "instance output is not correct."

def test_num_i_num_c_cores(self):
cmd = "incbench --num_i 2 --num_c 2 -C 0-7 tmp.py"
def test_cross_memory(self):
cmd = "incbench --num_c 1 -C 0 --cross_memory tmp/tmp.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
assert num_i == 1, "the number of instance should be all_c//num_c=1."
assert all_c == 1, "the number of available cores should be 1."
assert check_log_file(log_file_path), "instance output is not correct."

def test_throughput_latency(self):
cmd = "incbench --num_i 2 --num_c 2 -C 0-7 tmp/throughput_latency.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
assert num_i == 2, "the number of instance should be 2."
assert all_c == 4, "the number of available cores should be num_i*num_c=4."
assert check_log_file(log_file_path), "instance output is not correct."

def test_throughput(self):
cmd = "incbench --num_i 2 --num_c 2 -C 0-7 tmp/throughput.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
assert num_i == 2, "the number of instance should be 2."
assert all_c == 4, "the number of available cores should be num_i*num_c=4."
assert check_log_file(log_file_path), "instance output is not correct."

def test_latency(self):
cmd = "incbench --num_i 2 --num_c 2 -C 0-7 tmp/latency.py"
p = trigger_process(cmd)
stdout, _ = p.communicate()
num_i, all_c, log_file_path = check_main_process(stdout.decode())
Expand Down