Skip to content

Commit ea6d962

Browse files
committed
rcutorture: Judge RCU priority boosting on grace periods, not callbacks
Currently, rcutorture's testing of RCU priority boosting insists not only that grace periods complete, but also that callbacks be invoked. Although this is in fact what the user would want, ensuring that there is sufficient CPU bandwidth devoted to callback execution is in fact the user's responsibility. One could argue that rcutorture can take on that responsibility, which is true in theory. But in practice, ensuring sufficient CPU bandwidth to ksoftirqd, any rcuc kthreads, and any rcuo kthreads is not particularly consistent with rcutorture's main job, that of stress-testing RCU. In addition, if the system administrator (say) makes very poor choices when pinning rcuo kthreads and then runs rcutorture, there really isn't much rcutorture can do. Besides, RCU priority boosting only boosts lagging readers, not all the machinery required to invoke callbacks in a timely fashion. This commit therefore switches rcutorture's evaluation of RCU priority boosting from callback execution to grace-period completion by using the new start_poll_synchronize_rcu() and poll_state_synchronize_rcu() functions. When rcutorture is built in (as in when there is no innocent workload to inconvenience), the ksoftirqd ktheads are boosted to real-time priority 2 in order to allow timeouts to work properly in the face of rcutorture's testing of RCU priority boosting. Indeed, it is not as easy as it looks to create a reliable test of RCU priority boosting without destroying the rest of the kernel! Signed-off-by: Paul E. McKenney <[email protected]>
1 parent 226dd39 commit ea6d962

File tree

1 file changed

+51
-60
lines changed

1 file changed

+51
-60
lines changed

kernel/rcu/rcutorture.c

Lines changed: 51 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -245,12 +245,6 @@ static const char *rcu_torture_writer_state_getname(void)
245245
return rcu_torture_writer_state_names[i];
246246
}
247247

248-
#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_PREEMPT_RT)
249-
# define rcu_can_boost() 1
250-
#else
251-
# define rcu_can_boost() 0
252-
#endif
253-
254248
#ifdef CONFIG_RCU_TRACE
255249
static u64 notrace rcu_trace_clock_local(void)
256250
{
@@ -511,7 +505,7 @@ static struct rcu_torture_ops rcu_ops = {
511505
.gp_kthread_dbg = show_rcu_gp_kthreads,
512506
.stall_dur = rcu_jiffies_till_stall_check,
513507
.irq_capable = 1,
514-
.can_boost = rcu_can_boost(),
508+
.can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
515509
.extendables = RCUTORTURE_MAX_EXTEND,
516510
.name = "rcu"
517511
};
@@ -891,25 +885,11 @@ static unsigned long rcutorture_seq_diff(unsigned long new, unsigned long old)
891885

892886
/*
893887
* RCU torture priority-boost testing. Runs one real-time thread per
894-
* CPU for moderate bursts, repeatedly registering RCU callbacks and
895-
* spinning waiting for them to be invoked. If a given callback takes
896-
* too long to be invoked, we assume that priority inversion has occurred.
888+
* CPU for moderate bursts, repeatedly starting grace periods and waiting
889+
* for them to complete. If a given grace period takes too long, we assume
890+
* that priority inversion has occurred.
897891
*/
898892

899-
struct rcu_boost_inflight {
900-
struct rcu_head rcu;
901-
int inflight;
902-
};
903-
904-
static void rcu_torture_boost_cb(struct rcu_head *head)
905-
{
906-
struct rcu_boost_inflight *rbip =
907-
container_of(head, struct rcu_boost_inflight, rcu);
908-
909-
/* Ensure RCU-core accesses precede clearing ->inflight */
910-
smp_store_release(&rbip->inflight, 0);
911-
}
912-
913893
static int old_rt_runtime = -1;
914894

915895
static void rcu_torture_disable_rt_throttle(void)
@@ -936,15 +916,18 @@ static void rcu_torture_enable_rt_throttle(void)
936916
old_rt_runtime = -1;
937917
}
938918

939-
static bool rcu_torture_boost_failed(unsigned long start, unsigned long end)
919+
static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long start, unsigned long end)
940920
{
941921
static int dbg_done;
942922

943923
if (end - start > test_boost_duration * HZ - HZ / 2) {
944924
VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed");
945925
n_rcu_torture_boost_failure++;
946-
if (!xchg(&dbg_done, 1) && cur_ops->gp_kthread_dbg)
926+
if (!xchg(&dbg_done, 1) && cur_ops->gp_kthread_dbg) {
927+
pr_info("Boost inversion thread ->rt_priority %u gp_state %lu jiffies %lu\n",
928+
current->rt_priority, gp_state, end - start);
947929
cur_ops->gp_kthread_dbg();
930+
}
948931

949932
return true; /* failed */
950933
}
@@ -954,21 +937,20 @@ static bool rcu_torture_boost_failed(unsigned long start, unsigned long end)
954937

955938
static int rcu_torture_boost(void *arg)
956939
{
957-
unsigned long call_rcu_time;
958940
unsigned long endtime;
941+
unsigned long gp_state;
942+
unsigned long gp_state_time;
959943
unsigned long oldstarttime;
960-
struct rcu_boost_inflight rbi = { .inflight = 0 };
961944

962945
VERBOSE_TOROUT_STRING("rcu_torture_boost started");
963946

964947
/* Set real-time priority. */
965948
sched_set_fifo_low(current);
966949

967-
init_rcu_head_on_stack(&rbi.rcu);
968950
/* Each pass through the following loop does one boost-test cycle. */
969951
do {
970952
bool failed = false; // Test failed already in this test interval
971-
bool firsttime = true;
953+
bool gp_initiated = false;
972954

973955
/* Increment n_rcu_torture_boosts once per boost-test */
974956
while (!kthread_should_stop()) {
@@ -992,33 +974,33 @@ static int rcu_torture_boost(void *arg)
992974
goto checkwait;
993975
}
994976

995-
/* Do one boost-test interval. */
977+
// Do one boost-test interval.
996978
endtime = oldstarttime + test_boost_duration * HZ;
997979
while (time_before(jiffies, endtime)) {
998-
/* If we don't have a callback in flight, post one. */
999-
if (!smp_load_acquire(&rbi.inflight)) {
1000-
/* RCU core before ->inflight = 1. */
1001-
smp_store_release(&rbi.inflight, 1);
1002-
cur_ops->call(&rbi.rcu, rcu_torture_boost_cb);
1003-
/* Check if the boost test failed */
1004-
if (!firsttime && !failed)
1005-
failed = rcu_torture_boost_failed(call_rcu_time, jiffies);
1006-
call_rcu_time = jiffies;
1007-
firsttime = false;
980+
// Has current GP gone too long?
981+
if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
982+
failed = rcu_torture_boost_failed(gp_state, gp_state_time, jiffies);
983+
// If we don't have a grace period in flight, start one.
984+
if (!gp_initiated || cur_ops->poll_gp_state(gp_state)) {
985+
gp_state = cur_ops->start_gp_poll();
986+
gp_initiated = true;
987+
gp_state_time = jiffies;
1008988
}
1009-
if (stutter_wait("rcu_torture_boost"))
989+
if (stutter_wait("rcu_torture_boost")) {
1010990
sched_set_fifo_low(current);
991+
// If the grace period already ended,
992+
// we don't know when that happened, so
993+
// start over.
994+
if (cur_ops->poll_gp_state(gp_state))
995+
gp_initiated = false;
996+
}
1011997
if (torture_must_stop())
1012998
goto checkwait;
1013999
}
10141000

1015-
/*
1016-
* If boost never happened, then inflight will always be 1, in
1017-
* this case the boost check would never happen in the above
1018-
* loop so do another one here.
1019-
*/
1020-
if (!firsttime && !failed && smp_load_acquire(&rbi.inflight))
1021-
rcu_torture_boost_failed(call_rcu_time, jiffies);
1001+
// In case the grace period extended beyond the end of the loop.
1002+
if (gp_initiated && !failed && !cur_ops->poll_gp_state(gp_state))
1003+
rcu_torture_boost_failed(gp_state, gp_state_time, jiffies);
10221004

10231005
/*
10241006
* Set the start time of the next test interval.
@@ -1027,11 +1009,9 @@ static int rcu_torture_boost(void *arg)
10271009
* interval. Besides, we are running at RT priority,
10281010
* so delays should be relatively rare.
10291011
*/
1030-
while (oldstarttime == boost_starttime &&
1031-
!kthread_should_stop()) {
1012+
while (oldstarttime == boost_starttime && !kthread_should_stop()) {
10321013
if (mutex_trylock(&boost_mutex)) {
1033-
boost_starttime = jiffies +
1034-
test_boost_interval * HZ;
1014+
boost_starttime = jiffies + test_boost_interval * HZ;
10351015
mutex_unlock(&boost_mutex);
10361016
break;
10371017
}
@@ -1043,15 +1023,11 @@ checkwait: if (stutter_wait("rcu_torture_boost"))
10431023
sched_set_fifo_low(current);
10441024
} while (!torture_must_stop());
10451025

1046-
while (smp_load_acquire(&rbi.inflight))
1047-
schedule_timeout_uninterruptible(1); // rcu_barrier() deadlocks.
1048-
10491026
/* Clean up and exit. */
1050-
while (!kthread_should_stop() || smp_load_acquire(&rbi.inflight)) {
1027+
while (!kthread_should_stop()) {
10511028
torture_shutdown_absorb("rcu_torture_boost");
10521029
schedule_timeout_uninterruptible(1);
10531030
}
1054-
destroy_rcu_head_on_stack(&rbi.rcu);
10551031
torture_kthread_stopping("rcu_torture_boost");
10561032
return 0;
10571033
}
@@ -2643,15 +2619,15 @@ static bool rcu_torture_can_boost(void)
26432619

26442620
if (!(test_boost == 1 && cur_ops->can_boost) && test_boost != 2)
26452621
return false;
2646-
if (!cur_ops->call)
2622+
if (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)
26472623
return false;
26482624

26492625
prio = rcu_get_gp_kthreads_prio();
26502626
if (!prio)
26512627
return false;
26522628

26532629
if (prio < 2) {
2654-
if (boost_warn_once == 1)
2630+
if (boost_warn_once == 1)
26552631
return false;
26562632

26572633
pr_alert("%s: WARN: RCU kthread priority too low to test boosting. Skipping RCU boost test. Try passing rcutree.kthread_prio > 1 on the kernel command line.\n", KBUILD_MODNAME);
@@ -3129,6 +3105,21 @@ rcu_torture_init(void)
31293105
if (firsterr < 0)
31303106
goto unwind;
31313107
rcutor_hp = firsterr;
3108+
3109+
// Testing RCU priority boosting requires rcutorture do
3110+
// some serious abuse. Counter this by running ksoftirqd
3111+
// at higher priority.
3112+
if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) {
3113+
for_each_online_cpu(cpu) {
3114+
struct sched_param sp;
3115+
struct task_struct *t;
3116+
3117+
t = per_cpu(ksoftirqd, cpu);
3118+
WARN_ON_ONCE(!t);
3119+
sp.sched_priority = 2;
3120+
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
3121+
}
3122+
}
31323123
}
31333124
shutdown_jiffies = jiffies + shutdown_secs * HZ;
31343125
firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);

0 commit comments

Comments
 (0)