Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
9054793
move prettytable into inc.common
xin3he Jun 24, 2024
fd510db
add benchmark
xin3he Jun 25, 2024
29f974c
support windows
xin3he Jun 26, 2024
dabe436
fix bug
xin3he Jun 26, 2024
4f7cb7c
enable subprocess running
xin3he Jun 26, 2024
4a3b6cd
fix bug in windows
xin3he Jun 26, 2024
3cc3885
enhance log
xin3he Jun 26, 2024
b3c1091
add document
xin3he Jun 26, 2024
ca1f3b6
update platform status
xin3he Jun 26, 2024
29ebf1a
add incbench dlrm example
xin3he Jun 27, 2024
5960bb7
add more docstring
xin3he Jun 27, 2024
4c15bda
add performance test for sq opt-125m
xin3he Jun 28, 2024
60340f2
enhance pre-commit for max-line-length check
xin3he Jun 28, 2024
5f02407
add Multiple Instance Benchmark Summary
xin3he Jun 28, 2024
cc014af
Dump Throughput and Latency Summary
xin3he Jun 28, 2024
c3de633
change log folder and add UTs
xin3he Jul 1, 2024
9757779
add requirement
xin3he Jul 1, 2024
6ca810f
Merge branch 'master' into xinhe/benchmark
xin3he Jul 2, 2024
8549e92
improve UT coverage
xin3he Jul 3, 2024
0f6e057
fix pylint
xin3he Jul 3, 2024
7f3aff5
remove previous useless code
xin3he Jul 8, 2024
eeb56f6
fix bug
xin3he Jul 8, 2024
24ec333
fix pylint
xin3he Jul 8, 2024
18ca594
fix bug
xin3he Jul 8, 2024
b55b22b
Merge branch 'master' into xinhe/benchmark
chensuyue Jul 9, 2024
a524d9c
update summary format per suyue's request
xin3he Jul 9, 2024
245c75a
fdsa
xin3he Jul 9, 2024
81687bd
revert pre-commit change
xin3he Jul 9, 2024
d681fc7
Merge branch 'master' into xinhe/benchmark
xin3he Jul 10, 2024
7e73d1a
update UT
xin3he Jul 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add Multiple Instance Benchmark Summary
Signed-off-by: xin3he <[email protected]>
  • Loading branch information
xin3he committed Jun 28, 2024
commit 5f02407e7004f73920edb7404e4b81ef060a99ee
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,13 @@ function run_benchmark {

if [ "${topology}" = "opt_125m_ipex_sq" ]; then
model_name_or_path="facebook/opt-125m"
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
extra_cmd=$extra_cmd" --ipex"
elif [ "${topology}" = "llama2_7b_ipex_sq" ]; then
model_name_or_path="meta-llama/Llama-2-7b-hf"
extra_cmd=$extra_cmd" --ipex --sq --alpha 0.8"
extra_cmd=$extra_cmd" --ipex"
elif [ "${topology}" = "gpt_j_ipex_sq" ]; then
model_name_or_path="EleutherAI/gpt-j-6b"
extra_cmd=$extra_cmd" --ipex --sq --alpha 1.0"
extra_cmd=$extra_cmd" --ipex"
fi

if [[ ${mode} == "accuracy" ]]; then
Expand All @@ -96,9 +96,8 @@ function run_benchmark {
incbench --num_cores_per_instance 4 run_clm_no_trainer.py \
--model ${model_name_or_path} \
--approach ${approach} \
--output_dir ${tuned_checkpoint} \
--task ${task} \
--batch_size ${batch_size} \
--output_dir ${tuned_checkpoint} \
${extra_cmd} ${mode_cmd}
else
echo "Error: No such mode: ${mode}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,21 +239,21 @@ def run_fn(model):

if args.performance:
user_model.eval()
batch_size, input_leng = 1, 512
batch_size, input_leng = args.batch_size, 512
example_inputs = torch.ones((batch_size, input_leng), dtype=torch.long)
print("Batch size = {:d}".format(batch_size))
print("The length of input tokens = {:d}".format(input_leng))
import time

total_iters = 100
total_iters = args.iters
warmup_iters = 5
with torch.no_grad():
for i in range(total_iters):
if i == warmup_iters:
start = time.time()
user_model(example_inputs)
end = time.time()
latency = (end - start) / ((total_iters - warmup_iters))
throughput = ((total_iters - warmup_iters)) / (end - start)
latency = (end - start) / ((total_iters - warmup_iters) * args.batch_size)
throughput = ((total_iters - warmup_iters) * args.batch_size) / (end - start)
print("Latency: {:.3f} ms".format(latency * 10**3))
print("Throughput: {:.3f} samples/sec".format(throughput))
81 changes: 75 additions & 6 deletions neural_compressor/common/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,29 +268,47 @@ def set_cores_for_instance(args, numa_info):
target_cores = args.num_instances * args.num_cores_per_instance
assert target_cores <= len(
available_cores_list
), "num_instances * num_cores_per_instance = {} exceeds the range of physical CPUs:{}".format(
), "num_instances * num_cores_per_instance = {} exceeds the range of physical CPUs:{}.".format(
target_cores, len(available_cores_list)
)
cores_list = list(range(target_cores))
# log for cores in use
logger.info("num_instances * num_cores_per_instance = {} cores are used.".format(target_cores))
else:
# default behavior, only use numa:0
cores_list = numa_info[0]
# log for cores in use
logger.info("By default, Intel Neural Compressor uses all cores on numa:0.")
else:
cores_list = parse_str2list(args.cores)
# log for cores available
logger.info("{} cores are available.".format(len(cores_list)))
if args.num_cores_per_instance and args.num_instances:
target_cores = args.num_instances * args.num_cores_per_instance
assert target_cores <= len(
cores_list
), "num_instances * num_cores_per_instance = {} exceeds the range of available CPUs:{}".format(
), "num_instances * num_cores_per_instance = {} exceeds the range of available CPUs:{}.".format(
target_cores, len(cores_list)
)
cores_list = cores_list[:target_cores]

# preprocess args.num_instances to set default values
if args.num_instances is None:
if args.num_cores_per_instance:
args.num_instances = len(cores_list) // args.num_cores_per_instance
else:
args.num_instances = 1
logger.info("By default, Intel Neural Compressor triggers only one instance.")

### log for instances number and cores in use
if args.num_instances == 1:
logger.info("1 instance is triggered.", highlight=True)
else:
logger.info("{} instances are triggered.".format(args.num_instances), highlight=True)
if len(cores_list) == 1:
logger.info("Only 1 core is in use.", highlight=True)
else:
logger.info("{} cores are in use.".format(len(cores_list)), highlight=True)

# only need to process num_cores_per_instance now
core_list_per_instance = {}
Expand Down Expand Up @@ -356,10 +374,12 @@ def run_multi_instance_command(args, core_list_per_instance, raw_cmd):
"""
instance_cmd = ""
if not os.getenv("PYTHON_PATH"): # pragma: no cover
logger.info("The interpreter path is not set, using `python` command.")
logger.info("The interpreter path is not set, using string `python` as command.")
logger.info("To replace it, use `export PYTHON_PATH=xxx`.")
interpreter = os.getenv("PYTHON_PATH", "python")
current_work_dir = os.getcwd()
logfile_process_map = {}
logfile_dict = {}
for i, core_list in core_list_per_instance.items():
# build cmd and log file path
prefix = generate_prefix(args, core_list)
Expand All @@ -373,6 +393,7 @@ def run_multi_instance_command(args, core_list_per_instance, raw_cmd):
) # nosec
# log_file_path: [process_object, instance_command, instance_index]
logfile_process_map[instance_log_file] = [p, instance_cmd, i + 1]
logfile_dict[i + 1] = instance_log_file

# Dump each instance's standard output to the corresponding log file
for instance_log_file, p_cmd_i in logfile_process_map.items():
Expand All @@ -384,12 +405,60 @@ def run_multi_instance_command(args, core_list_per_instance, raw_cmd):
logger.info(f"The log of instance {p_cmd_i[2]} is saved to {instance_log_file}")

p.communicate()
return logfile_dict


def summary_latency_throughput(logfile_dict):
"""Get the summary of the benchmark."""
throughput_pattern = r"[T,t]hroughput:\s*([0-9]*\.?[0-9]+)\s*([a-zA-Z/]*)"
latency_pattern = r"[L,l]atency:\s*([0-9]*\.?[0-9]+)\s*([a-zA-Z/]*)"

latency_list = []
throughput_list = []
latency_unit_name = ""
throughput_unit_name = ""
for idx, logfile in logfile_dict.items():
with open(logfile, "r") as f:
for line in f:
re_latency = re.search(latency_pattern, line)
re_throughput = re.search(throughput_pattern, line)
if re_latency:
latency_list.append(float(re_latency.group(1)))
if not latency_unit_name:
latency_unit_name = re_latency.group(2)
if re_throughput:
throughput_list.append(float(re_throughput.group(1)))
if not throughput_unit_name:
throughput_unit_name = re_throughput.group(2)
if throughput_list and latency_list:
assert (
len(latency_list) == len(throughput_list) == len(logfile_dict)
), "Multiple instance benchmark failed with some instances!"

# dump collected latency and throughput info
header = "Multiple Instance Benchmark Summary"
field_names = [
"Instance",
"Latency ({})".format(latency_unit_name),
"Throughput ({})".format(throughput_unit_name),
]
output_data = []
for idx, (latency, throughput) in enumerate(zip(latency_list, throughput_list)):
output_data.append([idx + 1, round(latency, 3), round(throughput, 3)])
output_data.append(
[
format_list2str(logfile_dict.keys()),
round(sum(latency_list) / len(latency_list), 3),
round(sum(throughput_list), 3),
]
)
Statistics(output_data, header=header, field_names=field_names).print_stat()


def benchmark():
"""Benchmark API interface."""
logger.info("Start benchmark with Intel Neural Compressor.")
logger.info("By default, Intel Neural Compressor triggers only one instance on numa:0.")
logger.info("Intel Neural Compressor only uses physical CPUs for the best performance.")

parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("--num_instances", type=int, default=None, help="Determine the number of instances.")
Expand All @@ -409,7 +478,7 @@ def benchmark():
assert sys.platform in ["linux", "win32"], "only support platform windows and linux..."

numa_info = dump_numa_info() # show numa info and current usage of cores
logger.info("Intel Neural Compressor only uses physical CPUs for the best performance.")
core_list_per_instance = set_cores_for_instance(args, numa_info=numa_info)
script_and_parameters = args.script + " " + " ".join(args.parameters)
run_multi_instance_command(args, core_list_per_instance, raw_cmd=script_and_parameters)
logfile_dict = run_multi_instance_command(args, core_list_per_instance, raw_cmd=script_and_parameters)
summary_latency_throughput(logfile_dict)
6 changes: 6 additions & 0 deletions neural_compressor/common/utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ def fatal(msg, *args, **kwargs):
def info(msg, *args, **kwargs):
"""Output log with the info level."""
kwargs.setdefault("stacklevel", 2)
highlight = kwargs.pop("highlight", False)
if highlight:
RESET = "\033[0m"
BOLD = "\033[1m"
RED = "\033[91m"
msg = f"{BOLD}{RED}{msg}{RESET}"
if isinstance(msg, dict):
for _, line in enumerate(_pretty_dict(msg).split("\n")):
Logger().get_logger().info(line, *args, **kwargs)
Expand Down