diff --git a/docs/bpftune.rst b/docs/bpftune.rst index 805b16f..3e5578d 100644 --- a/docs/bpftune.rst +++ b/docs/bpftune.rst @@ -16,6 +16,7 @@ SYNOPSIS | { [**-s** | **--stderr** } | { [**-c** | **--cgroup**] cgroup} | { [**-l** | **--libdir** ] libdir} | [{ **-d** | **--debug** }] } { [**-r** | **--learning_rate** ] learning_rate} + { [**-R** | **--rollback** ]} { [**-S** | **--support** ]} DESCRIPTION @@ -75,5 +76,12 @@ OPTIONS the limit is increased by 25%. Default learning rate is 4. Lower values are more conservative as they change only when closer to limits, but may require more frequent changes as - a result + a result. + -R, --rollback + + Roll back sysctl settings on exit; this allows us to explore + tunable updates bpftune makes without making long-term changes + to the system. On exit, bpftune summarizes the changes made + and rolls back to the sysctl values that were set prior to + bpftune running. diff --git a/include/bpftune/bpftune.h b/include/bpftune/bpftune.h index 427322e..bc2de6c 100644 --- a/include/bpftune/bpftune.h +++ b/include/bpftune/bpftune.h @@ -167,7 +167,7 @@ struct bpftuner { const char *name; struct bpf_object_skeleton *skeleton; enum bpftune_support_level bpf_support; - bool bpf_legacy; + bool rollback; void *skel; void *obj; int (*init)(struct bpftuner *tuner); diff --git a/include/bpftune/libbpftune.h b/include/bpftune/libbpftune.h index e42acda..17bb4ad 100644 --- a/include/bpftune/libbpftune.h +++ b/include/bpftune/libbpftune.h @@ -299,4 +299,6 @@ int bpftuner_strategies_add(struct bpftuner *tuner, struct bpftuner_strategy **s bool bpftuner_bpf_prog_in_strategy(struct bpftuner *tuner, const char *prog); void bpftuner_bpf_set_autoload(struct bpftuner *tuner); +void bpftuner_rollback_set(struct bpftuner *tuner); + #endif /* __LIBBPFTUNE_H */ diff --git a/src/bpftune.c b/src/bpftune.c index 3d492cd..50f141b 100644 --- a/src/bpftune.c +++ b/src/bpftune.c @@ -54,6 +54,7 @@ int ringbuf_map_fd; void *ring_buffer; bool use_stderr; +bool rollback; char *allowlist[BPFTUNE_MAX_TUNERS]; int nr_allowlist; @@ -200,6 +201,8 @@ int init(const char *library_dir) /* individual tuner failure shouldn't prevent progress */ if (!tuner) continue; + if (rollback) + bpftuner_rollback_set(tuner); if (ringbuf_map_fd == 0) ringbuf_map_fd = bpftuner_ring_buffer_map_fd(tuner); } @@ -233,6 +236,7 @@ void do_help(void) " { -h|--help}}\n" " { -l|--library_path library_path}\n" " { -r|--learning_rate learning_rate}\n" + " { -R|--rollback}\n" " { -s|--stderr}\n" " { -S|--suppport}\n" " { -V|--version}}\n", @@ -284,6 +288,7 @@ int main(int argc, char *argv[]) { "help", no_argument, NULL, 'h' }, { "libdir", required_argument, NULL, 'l' }, { "learning_rate", required_argument, NULL, 'r' }, + { "rollback", no_argument, NULL, 'R' }, { "stderr", no_argument, NULL, 's' }, { "support", no_argument, NULL, 'S' }, { "version", no_argument, NULL, 'V' }, @@ -302,7 +307,7 @@ int main(int argc, char *argv[]) bin_name = argv[0]; - while ((opt = getopt_long(argc, argv, "a:c:dDhl:Lr:sSV", options, NULL)) + while ((opt = getopt_long(argc, argv, "a:c:dDhl:Lr:RsSV", options, NULL)) >= 0) { switch (opt) { case 'a': @@ -338,6 +343,9 @@ int main(int argc, char *argv[]) return 1; } break; + case 'R': + rollback = true; + break; case 's': use_stderr = true; break; diff --git a/src/libbpftune.c b/src/libbpftune.c index 9585509..f701b73 100644 --- a/src/libbpftune.c +++ b/src/libbpftune.c @@ -1055,6 +1055,18 @@ static void bpftuner_scenario_log(struct bpftuner *tuner, unsigned int tunable, } bpftune_log(BPFTUNE_LOG_LEVEL, "sysctl '%s' changed from (%s) -> (%s)\n", t->desc.name, oldvals, newvals); + + if (tuner->rollback && global_ns) { + bpftuner_tunable_sysctl_write(tuner, + tunable, + scenario, + 0, + t->desc.num_values, + t->initial_values, + "Rolling back sysctl values for '%s' from (%s) to original values (%s)...\n", + t->desc.name, + newvals, oldvals); + } } } else { bpftune_log(BPFTUNE_LOG_LEVEL, "Scenario '%s' occurred for tunable '%s' in %sglobal ns. %s\n", @@ -1677,3 +1689,8 @@ void bpftuner_bpf_set_autoload(struct bpftuner *tuner) } } } + +void bpftuner_rollback_set(struct bpftuner *tuner) +{ + tuner->rollback = true; +} diff --git a/src/libbpftune.map b/src/libbpftune.map index dd9441f..ecbc582 100644 --- a/src/libbpftune.map +++ b/src/libbpftune.map @@ -42,6 +42,7 @@ LIBBPFTUNE_0.1.1 { bpftuner_ring_buffer_map_fd; bpftuner_strategy_set; bpftuner_strategies_add; + bpftuner_rollback_set; bpftune_ring_buffer_init; bpftune_ring_buffer_poll; bpftune_ring_buffer_fini; diff --git a/test/Makefile b/test/Makefile index 2c85b4f..176baa4 100644 --- a/test/Makefile +++ b/test/Makefile @@ -23,6 +23,7 @@ PERF_TESTS = iperf3_test qperf_test TUNER_TESTS = support_test log_test service_test inotify_test cap_test \ sample_test sample_legacy_test \ strategy_test strategy_legacy_test \ + rollback_test rollback_legacy_test \ sysctl_test sysctl_legacy_test sysctl_netns_test \ netns_test netns_legacy_test \ backlog_test backlog_legacy_test \ diff --git a/test/rollback_legacy_test.sh b/test/rollback_legacy_test.sh new file mode 100644 index 0000000..776026a --- /dev/null +++ b/test/rollback_legacy_test.sh @@ -0,0 +1,124 @@ +#!/usr/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# Copyright (c) 2023, Oracle and/or its affiliates. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public +# License v2 as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this program; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 021110-1307, USA. +# + +# run iperf3 test with low wmem max, ensure tuner increases it but it is +# rolled back on exit. + +PORT=5201 + +. ./test_lib.sh + +SLEEPTIME=1 +TIMEOUT=30 + +for FAMILY in ipv4 ipv6 ; do + + for CLIENT_OPTS in "" ; do + case $FAMILY in + ipv4) + ADDR=$VETH1_IPV4 + ;; + ipv6) + ADDR=$VETH1_IPV6 + ;; + esac + + test_start "$0|rollback legacy test to $ADDR:$PORT $FAMILY opts $CLIENT_OPTS $LATENCY" + + wmem_orig=($(sysctl -n net.ipv4.tcp_wmem)) + + test_setup true + + sysctl -w net.ipv4.tcp_wmem="${wmem_orig[0]} ${wmem_orig[1]} ${wmem_orig[1]}" + + declare -A results + for MODE in baseline test ; do + + echo "Running ${MODE}..." + test_run_cmd_local "ip netns exec $NETNS $IPERF3 -s -p $PORT -1 &" + if [[ $MODE != "baseline" ]]; then + test_run_cmd_local "$BPFTUNE -LR &" + sleep $SETUPTIME + else + LOGSZ=$(wc -l $LOGFILE | awk '{print $1}') + LOGSZ=$(expr $LOGSZ + 1) + fi + test_run_cmd_local "$IPERF3 -fm $CLIENT_OPTS -p $PORT -c $ADDR" true + + sleep $SLEEPTIME + + sresults=$(grep -E "sender" ${CMDLOG} | awk '{print $7}') + rresults=$(grep -E "receiver" ${CMDLOG} | awk '{print $7}') + units=$(grep -E "sender|receiver" ${CMDLOG} | awk '{print $8}' |head -1) + + if [[ $MODE == "baseline" ]]; then + read -r -a sbaseline_results <<< $sresults + read -r -a rbaseline_results <<< $rresults + echo "" > ${CMDLOG} + else + read -r -a stest_results <<< $sresults + read -r -a rtest_results <<< $rresults + pkill -TERM bpftune + fi + sleep $SLEEPTIME + done + + wmem_post=($(sysctl -n net.ipv4.tcp_wmem)) + sysctl -w net.ipv4.tcp_wmem="${wmem_orig[0]} ${wmem_orig[1]} ${wmem_orig[2]}" + if [[ $MODE == "test" ]]; then + if [[ "${wmem_post[2]}" -eq ${wmem_orig[1]} ]]; then + echo "wmem before ${wmem_orig[1]} ; after ${wmem_post[2]}" + else + test_cleanup + fi + fi + printf "Results sender (${units}): " + for (( i=0; i < ${#sbaseline_results[@]}; i++ )) + do + sbase=$(roundup ${sbaseline_results[$i]}) + stest=$(roundup ${stest_results[$i]}) + if [[ ${sbase} -gt ${stest} ]]; then + bold "Warning: baseline (${sbase}) > test (${stest})" + else + echo "baseline (${sbase}) < test (${stest})" + fi + done + printf "Results receiver (${units}): " + for (( i=0; i < ${#rbaseline_results[@]}; i++ )) + do + rbase=$(roundup ${rbaseline_results[$i]}) + rtest=$(roundup ${rtest_results[$i]}) + if [[ ${rbase} -gt ${rtest} ]]; then + bold "Warning: baseline (${rbase}) > test (${rtest})" + else + echo "baseline (${rbase}) < test (${rtest})" + fi + done + + grep "Rolling back" $LOGFILE + + test_pass + + test_cleanup + done +done + +test_exit diff --git a/test/rollback_test.sh b/test/rollback_test.sh new file mode 100644 index 0000000..7c4ca21 --- /dev/null +++ b/test/rollback_test.sh @@ -0,0 +1,124 @@ +#!/usr/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# Copyright (c) 2023, Oracle and/or its affiliates. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public +# License v2 as published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this program; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 021110-1307, USA. +# + +# run iperf3 test with low wmem max, ensure tuner increases it but it is +# rolled back on exit. + +PORT=5201 + +. ./test_lib.sh + +SLEEPTIME=1 +TIMEOUT=30 + +for FAMILY in ipv4 ipv6 ; do + + for CLIENT_OPTS in "" ; do + case $FAMILY in + ipv4) + ADDR=$VETH1_IPV4 + ;; + ipv6) + ADDR=$VETH1_IPV6 + ;; + esac + + test_start "$0|rollback test to $ADDR:$PORT $FAMILY opts $CLIENT_OPTS $LATENCY" + + wmem_orig=($(sysctl -n net.ipv4.tcp_wmem)) + + test_setup true + + sysctl -w net.ipv4.tcp_wmem="${wmem_orig[0]} ${wmem_orig[1]} ${wmem_orig[1]}" + + declare -A results + for MODE in baseline test ; do + + echo "Running ${MODE}..." + test_run_cmd_local "ip netns exec $NETNS $IPERF3 -s -p $PORT -1 &" + if [[ $MODE != "baseline" ]]; then + test_run_cmd_local "$BPFTUNE -R &" + sleep $SETUPTIME + else + LOGSZ=$(wc -l $LOGFILE | awk '{print $1}') + LOGSZ=$(expr $LOGSZ + 1) + fi + test_run_cmd_local "$IPERF3 -fm $CLIENT_OPTS -p $PORT -c $ADDR" true + + sleep $SLEEPTIME + + sresults=$(grep -E "sender" ${CMDLOG} | awk '{print $7}') + rresults=$(grep -E "receiver" ${CMDLOG} | awk '{print $7}') + units=$(grep -E "sender|receiver" ${CMDLOG} | awk '{print $8}' |head -1) + + if [[ $MODE == "baseline" ]]; then + read -r -a sbaseline_results <<< $sresults + read -r -a rbaseline_results <<< $rresults + echo "" > ${CMDLOG} + else + read -r -a stest_results <<< $sresults + read -r -a rtest_results <<< $rresults + pkill -TERM bpftune + fi + sleep $SLEEPTIME + done + + wmem_post=($(sysctl -n net.ipv4.tcp_wmem)) + sysctl -w net.ipv4.tcp_wmem="${wmem_orig[0]} ${wmem_orig[1]} ${wmem_orig[2]}" + if [[ $MODE == "test" ]]; then + if [[ "${wmem_post[2]}" -eq ${wmem_orig[1]} ]]; then + echo "wmem before ${wmem_orig[1]} ; after ${wmem_post[2]}" + else + test_cleanup + fi + fi + printf "Results sender (${units}): " + for (( i=0; i < ${#sbaseline_results[@]}; i++ )) + do + sbase=$(roundup ${sbaseline_results[$i]}) + stest=$(roundup ${stest_results[$i]}) + if [[ ${sbase} -gt ${stest} ]]; then + bold "Warning: baseline (${sbase}) > test (${stest})" + else + echo "baseline (${sbase}) < test (${stest})" + fi + done + printf "Results receiver (${units}): " + for (( i=0; i < ${#rbaseline_results[@]}; i++ )) + do + rbase=$(roundup ${rbaseline_results[$i]}) + rtest=$(roundup ${rtest_results[$i]}) + if [[ ${rbase} -gt ${rtest} ]]; then + bold "Warning: baseline (${rbase}) > test (${rtest})" + else + echo "baseline (${rbase}) < test (${rtest})" + fi + done + + grep "Rolling back" $LOGFILE + + test_pass + + test_cleanup + done +done + +test_exit