Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
net_buffer_tuner: increase netdev_budget if we see time_squeeze events
time_squeeze for a softnet poll means we ran out of time specified
in netdev_budget_usecs; if we see increases in time_squeeze, increase
netdev_budget

Signed-off-by: Alan Maguire <[email protected]>
  • Loading branch information
alan-maguire committed Dec 3, 2024
commit 2491fc315db5c8c405bf76c2410391dc64ba571a
70 changes: 52 additions & 18 deletions src/net_buffer_tuner.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,17 @@ __u64 drop_interval_start = 0;
__u64 flow_limit_cpu_bitmap = 0;

int netdev_max_backlog = 0;
int netdev_budget = 0;

#ifdef BPFTUNE_LEGACY
SEC("kretprobe/enqueue_to_backlog")
int BPF_KRETPROBE(bpftune_enqueue_to_backlog, int ret)
#else

BPF_MAP_DEF(time_squeeze_map, BPF_MAP_TYPE_PERCPU_ARRAY, unsigned int, unsigned int, 1, 0);

extern const struct softnet_data softnet_data __ksym;

SEC("fexit/enqueue_to_backlog")
int BPF_PROG(bpftune_enqueue_to_backlog, struct sk_buff *skb, int cpu,
unsigned int *qtail, int ret)
Expand Down Expand Up @@ -64,28 +70,56 @@ int BPF_PROG(bpftune_enqueue_to_backlog, struct sk_buff *skb, int cpu,
drop_count = 1;
drop_interval_start = time;
}
if (drop_count < (max_backlog >> 4))
return 0;

old[0] = max_backlog;
new[0] = BPFTUNE_GROW_BY_DELTA(max_backlog);
send_net_sysctl_event(NULL, NETDEV_MAX_BACKLOG_INCREASE,
NETDEV_MAX_BACKLOG, old, new, &event);
if (drop_count >= (max_backlog >> 4)) {
old[0] = max_backlog;
new[0] = BPFTUNE_GROW_BY_DELTA(max_backlog);
send_net_sysctl_event(NULL, NETDEV_MAX_BACKLOG_INCREASE,
NETDEV_MAX_BACKLOG, old, new, &event);

#ifdef BPFTUNE_LEGACY
int cpu = bpf_get_smp_processor_id();
int cpu = bpf_get_smp_processor_id();
#endif
/* ensure flow limits prioritize small flows on this cpu */
if (cpu < 64) {
cpubit = 1 << cpu;
if (!(flow_limit_cpu_bitmap & cpubit)) {
old[0] = flow_limit_cpu_bitmap;
new[0] = flow_limit_cpu_bitmap |= cpubit;
if (!send_net_sysctl_event(NULL, FLOW_LIMIT_CPU_SET,
FLOW_LIMIT_CPU_BITMAP,
old, new, &event))
flow_limit_cpu_bitmap = new[0];
/* ensure flow limits prioritize small flows on this cpu */
if (cpu < 64) {
cpubit = 1 << cpu;
if (!(flow_limit_cpu_bitmap & cpubit)) {
old[0] = flow_limit_cpu_bitmap;
new[0] = flow_limit_cpu_bitmap |= cpubit;
if (!send_net_sysctl_event(NULL, FLOW_LIMIT_CPU_SET,
FLOW_LIMIT_CPU_BITMAP,
old, new, &event))
flow_limit_cpu_bitmap = new[0];
}
}
}
return 0;
}

#ifndef BPFTUNE_LEGACY
SEC("fexit/enqueue_to_backlog")
int BPF_PROG(net_rx_action)
{
struct bpftune_event event = { 0 };
long old[3], new[3];
struct softnet_data *sd;
unsigned int time_squeeze, *last_time_squeeze;
unsigned int zero = 0;

sd = (struct softnet_data *)bpf_this_cpu_ptr(&softnet_data);

time_squeeze = BPFTUNE_CORE_READ(sd, time_squeeze);
if (!time_squeeze)
return 0;
last_time_squeeze = bpf_map_lookup_elem(&time_squeeze_map, &zero);
if (!last_time_squeeze)
return 0;
if (time_squeeze <= *last_time_squeeze)
return 0;
*last_time_squeeze = time_squeeze;
old[0] = netdev_budget;
new[0] = BPFTUNE_GROW_BY_DELTA(netdev_budget);
send_net_sysctl_event(NULL, NETDEV_BUDGET_INCREASE,
NETDEV_BUDGET, old, new, &event);
return 0;
}
#endif
48 changes: 36 additions & 12 deletions src/net_buffer_tuner.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,29 @@ static struct bpftunable_desc descs[] = {
{ FLOW_LIMIT_CPU_BITMAP,
BPFTUNABLE_SYSCTL, "net.core.flow_limit_cpu_bitmap",
0, 1 },
{ NETDEV_BUDGET, BPFTUNABLE_SYSCTL, "net.core.netdev_budget",
0, 1 },
};

static struct bpftunable_scenario scenarios[] = {
{ NETDEV_MAX_BACKLOG_INCREASE, "need to increase max backlog size",
"Need to increase backlog size to prevent drops for faster connection" },
{ FLOW_LIMIT_CPU_SET, "need to set per-cpu bitmap value",
"Need to set flow limit per-cpu to prioritize small flows" }
"Need to set flow limit per-cpu to prioritize small flows" },
{ NETDEV_BUDGET_INCREASE, "need to increase # of packets processed per NAPI poll",
"Need to increase number of packets processed across network devices during NAPI poll to use all of net.core.netdev_budget_usecs" }
};

int init(struct bpftuner *tuner)
{
long cpu_bitmap = 0;
long max_backlog = 0;
long budget = 0;
int err;

bpftune_sysctl_read(0, "net.core.flow_limit_cpu_bitmap", &cpu_bitmap);
bpftune_sysctl_read(0, "net.core.netdev_max_backlog", &max_backlog);
bpftune_sysctl_read(0, "net.core.netdev_budget", &budget);
err = bpftuner_bpf_open(net_buffer, tuner);
if (err)
return err;
Expand All @@ -44,6 +50,8 @@ int init(struct bpftuner *tuner)
cpu_bitmap);
bpftuner_bpf_var_set(net_buffer, tuner, netdev_max_backlog,
max_backlog);
bpftuner_bpf_var_set(net_buffer, tuner, netdev_budget,
budget);
err = bpftuner_bpf_attach(net_buffer, tuner);
if (err)
return err;
Expand All @@ -64,7 +72,7 @@ void event_handler(struct bpftuner *tuner,
{
int scenario = event->scenario_id;
const char *tunable;
int id;
int id, ret;

/* netns cookie not supported; ignore */
if (event->netns_cookie == (unsigned long)-1)
Expand All @@ -73,21 +81,23 @@ void event_handler(struct bpftuner *tuner,
id = event->update[0].id;
tunable = bpftuner_tunable_name(tuner, id);
if (!tunable) {
bpftune_log(LOG_DEBUG, "unknown tunable [%d] for tcp_buffer_tuner\n", id);
bpftune_log(LOG_DEBUG, "unknown tunable [%d] for net_buffer_tuner\n", id);
return;
}
switch (id) {
case NETDEV_MAX_BACKLOG:
bpftuner_tunable_sysctl_write(tuner, id, scenario,
event->netns_cookie, 1,
(long int *)event->update[0].new,
ret = bpftuner_tunable_sysctl_write(tuner, id, scenario,
event->netns_cookie, 1,
(long int *)event->update[0].new,
"Due to excessive drops, change %s from (%ld) -> (%ld)\n",
tunable,
event->update[0].old[0],
tunable,
event->update[0].old[0],
event->update[0].new[0]);
if (!ret) {
/* update value of netdev_max_backlog for BPF program */
bpftuner_bpf_var_set(net_buffer, tuner, netdev_max_backlog,
event->update[0].new[0]);
/* update value of netdev_max_backlog for BPF program */
bpftuner_bpf_var_set(net_buffer, tuner, netdev_max_backlog,
event->update[0].new[0]);
}
break;
case FLOW_LIMIT_CPU_BITMAP:
bpftuner_tunable_sysctl_write(tuner, id, scenario,
Expand All @@ -97,6 +107,20 @@ void event_handler(struct bpftuner *tuner,
tunable,
event->update[0].old[0],
event->update[0].new[0]);

break;
case NETDEV_BUDGET:
ret = bpftuner_tunable_sysctl_write(tuner, id, scenario,
event->netns_cookie, 1,
(long int *)event->update[0].new,
"To maximize # packets processed per NAPI cycle, change %s from (%ld) -> (%ld)\n",
tunable,
event->update[0].old[0],
event->update[0].new[0]);
if (!ret) {
/* update value of netdev_budget for BPF program */
bpftuner_bpf_var_set(net_buffer, tuner, netdev_budget,
event->update[0].new[0]);
}
break;
}
}
2 changes: 2 additions & 0 deletions src/net_buffer_tuner.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@
enum net_buffer_tunables {
NETDEV_MAX_BACKLOG,
FLOW_LIMIT_CPU_BITMAP,
NETDEV_BUDGET,
NET_BUFFER_NUM_TUNABLES,
};

enum net_buffer_scenarios {
NETDEV_MAX_BACKLOG_INCREASE,
FLOW_LIMIT_CPU_SET,
NETDEV_BUDGET_INCREASE,
};
1 change: 1 addition & 0 deletions test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ TUNER_TESTS = support_test log_test service_test inotify_test cap_test \
podman_globalonly_test podman_globalonly_legacy_test \
sysctl_test sysctl_legacy_test sysctl_netns_test \
netns_test netns_legacy_test \
budget_test \
backlog_test backlog_legacy_test \
frag_test frag_legacy_test \
neigh_table_test neigh_table_v4only_test \
Expand Down
92 changes: 92 additions & 0 deletions test/budget_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/bash
#
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# Copyright (c) 2023, Oracle and/or its affiliates.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public
# License v2 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 021110-1307, USA.
#

# run iperf3 test with low netdev_budget, ensure tuner increases it.

PORT=5201

. ./test_lib.sh

SLEEPTIME=1
TIMEOUT=30
MAX_CONN=50

for FAMILY in ipv4 ipv6 ; do

for CLIENT_OPTS in "" ; do
case $FAMILY in
ipv4)
ADDR=$VETH2_IPV4
;;
ipv6)
ADDR=$VETH2_IPV6
;;
esac

test_start "$0|budget test to $ADDR:$PORT $FAMILY opts $CLIENT_OPTS $LATENCY"

budget_orig=($(sysctl -n net.core.netdev_max_backlog))
test_setup true

sysctl -w net.core.netdev_budget=5
budget_pre=($(sysctl -n net.core.netdev_budget))
declare -A results
for MODE in baseline test ; do

echo "Running ${MODE}..."
test_run_cmd_local "$IPERF3 -s -p $PORT &"
if [[ $MODE != "baseline" ]]; then
test_run_cmd_local "$BPFTUNE -s &" true
sleep $SETUPTIME
else
LOGSZ=$(wc -l $LOGFILE | awk '{print $1}')
LOGSZ=$(expr $LOGSZ + 1)
fi
test_run_cmd_local "ip netns exec $NETNS $IPERF3 -fm -t 20 $CLIENT_OPTS -c $PORT -c $ADDR" true
sleep $SLEEPTIME

sresults=$(grep -E "sender" ${CMDLOG} | awk '{print $7}')
rresults=$(grep -E "receiver" ${CMDLOG} | awk '{print $7}')
units=$(grep -E "sender|receiver" ${CMDLOG} | awk '{print $8}' |head -1)

if [[ $MODE == "baseline" ]]; then
read -r -a sbaseline_results <<< $sresults
read -r -a rbaseline_results <<< $rresults
echo "" > ${CMDLOG}
else
read -r -a stest_results <<< $sresults
read -r -a rtest_results <<< $rresults

fi
sleep $SLEEPTIME
done

budget_post=($(sysctl -n net.core.netdev_budget))
sysctl -w net.core.netdev_budget="$budget_orig"
echo "budget ${budget_pre} -> ${budget_post}"
if [[ "$budget_post" -gt "$budget_pre" ]]; then
test_pass
fi
test_cleanup
done
done

test_exit