aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2015-06-05 19:01:11 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2015-06-05 19:01:11 +1000
commita1f78abfff6484e0d9c98079a0911545efe6791c (patch)
tree05f6e5c2115bc8234f6fd13355e744d959408c23
parentb86848b29a691a1552c2042b6d63d11149acfb91 (diff)
parentd254f32b7c0376bd2b9174df41333824fb734804 (diff)
downloadlinux-next-a1f78abfff6484e0d9c98079a0911545efe6791c.tar.gz
Merge remote-tracking branch 'rcu/rcu/next'
-rw-r--r--Documentation/RCU/rcu_dereference.txt2
-rw-r--r--Documentation/kernel-parameters.txt35
-rw-r--r--Documentation/memory-barriers.txt2
-rw-r--r--arch/arm/kernel/smp.c6
-rw-r--r--include/linux/rcupdate.h44
-rw-r--r--init/Kconfig9
-rw-r--r--kernel/rcu/rcutorture.c19
-rw-r--r--kernel/rcu/tree.c18
-rw-r--r--kernel/rcu/tree.h6
-rw-r--r--kernel/rcu/update.c49
-rw-r--r--kernel/time/Kconfig2
11 files changed, 109 insertions, 83 deletions
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
index 1e6c0da994f544..c0bf2441a2baf5 100644
--- a/Documentation/RCU/rcu_dereference.txt
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -28,7 +28,7 @@ o You must use one of the rcu_dereference() family of primitives
o Avoid cancellation when using the "+" and "-" infix arithmetic
operators. For example, for a given variable "x", avoid
"(x-x)". There are similar arithmetic pitfalls from other
- arithmetic operatiors, such as "(x*0)", "(x/(x+1))" or "(x%1)".
+ arithmetic operators, such as "(x*0)", "(x/(x+1))" or "(x%1)".
The compiler is within its rights to substitute zero for all of
these expressions, so that subsequent accesses no longer depend
on the rcu_dereference(), again possibly resulting in bugs due
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 136e6223a6c124..67709a5f1af48f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3106,22 +3106,35 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
in a given burst of a callback-flood test.
rcutorture.fqs_duration= [KNL]
- Set duration of force_quiescent_state bursts.
+ Set duration of force_quiescent_state bursts
+ in microseconds.
rcutorture.fqs_holdoff= [KNL]
- Set holdoff time within force_quiescent_state bursts.
+ Set holdoff time within force_quiescent_state bursts
+ in microseconds.
rcutorture.fqs_stutter= [KNL]
- Set wait time between force_quiescent_state bursts.
+ Set wait time between force_quiescent_state bursts
+ in seconds.
+
+ rcutorture.gp_cond= [KNL]
+ Use conditional/asynchronous update-side
+ primitives, if available.
rcutorture.gp_exp= [KNL]
- Use expedited update-side primitives.
+ Use expedited update-side primitives, if available.
rcutorture.gp_normal= [KNL]
- Use normal (non-expedited) update-side primitives.
- If both gp_exp and gp_normal are set, do both.
- If neither gp_exp nor gp_normal are set, still
- do both.
+ Use normal (non-expedited) asynchronous
+ update-side primitives, if available.
+
+ rcutorture.gp_sync= [KNL]
+ Use normal (non-expedited) synchronous
+ update-side primitives, if available. If all
+ of rcutorture.gp_cond=, rcutorture.gp_exp=,
+ rcutorture.gp_normal=, and rcutorture.gp_sync=
+ are zero, rcutorture acts as if is interpreted
+ they are all non-zero.
rcutorture.n_barrier_cbs= [KNL]
Set callbacks/threads for rcu_barrier() testing.
@@ -3148,9 +3161,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Set time (s) between CPU-hotplug operations, or
zero to disable CPU-hotplug testing.
- rcutorture.torture_runnable= [BOOT]
- Start rcutorture running at boot time.
-
rcutorture.shuffle_interval= [KNL]
Set task-shuffle interval (s). Shuffling tasks
allows some CPUs to go into dyntick-idle mode
@@ -3191,6 +3201,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Test RCU's dyntick-idle handling. See also the
rcutorture.shuffle_interval parameter.
+ rcutorture.torture_runnable= [BOOT]
+ Start rcutorture running at boot time.
+
rcutorture.torture_type= [KNL]
Specify the RCU implementation to test.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 13feb697271f0a..3d06f98b2ff2c3 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -746,7 +746,7 @@ You must also be careful not to rely too much on boolean short-circuit
evaluation. Consider this example:
q = READ_ONCE_CTRL(a);
- if (a || 1 > 0)
+ if (q || 1 > 0)
ACCESS_ONCE(b) = 1;
Because the first condition cannot fault and the second condition is
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 90dfbedfbfb852..f4e0e14e700ae4 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -220,15 +220,13 @@ int __cpu_disable(void)
return 0;
}
-static DECLARE_COMPLETION(cpu_died);
-
/*
* called on the thread which is asking for a CPU to be shutdown -
* waits until shutdown has completed, or it is timed out.
*/
void __cpu_die(unsigned int cpu)
{
- if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) {
+ if (!cpu_wait_death(cpu, 5)) {
pr_err("CPU%u: cpu didn't die\n", cpu);
return;
}
@@ -274,7 +272,7 @@ void __ref cpu_die(void)
* this returns, power and/or clocks can be removed at any point
* from this CPU and its cache by platform_cpu_kill().
*/
- complete(&cpu_died);
+ (void)cpu_report_death();
/*
* Ensure that the cache lines associated with that completion are
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 4cf5f51b4c9c43..def6d45ad61c02 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -309,7 +309,7 @@ static inline void rcu_sysrq_end(void)
}
#endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_NO_HZ_FULL
void rcu_user_enter(void);
void rcu_user_exit(void);
#else
@@ -317,7 +317,7 @@ static inline void rcu_user_enter(void) { }
static inline void rcu_user_exit(void) { }
static inline void rcu_user_hooks_switch(struct task_struct *prev,
struct task_struct *next) { }
-#endif /* CONFIG_RCU_USER_QS */
+#endif /* CONFIG_NO_HZ_FULL */
#ifdef CONFIG_RCU_NOCB_CPU
void rcu_init_nohz(void);
@@ -469,46 +469,10 @@ int rcu_read_lock_bh_held(void);
* If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
* RCU-sched read-side critical section. In absence of
* CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
- * critical section unless it can prove otherwise. Note that disabling
- * of preemption (including disabling irqs) counts as an RCU-sched
- * read-side critical section. This is useful for debug checks in functions
- * that required that they be called within an RCU-sched read-side
- * critical section.
- *
- * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
- * and while lockdep is disabled.
- *
- * Note that if the CPU is in the idle loop from an RCU point of
- * view (ie: that we are in the section between rcu_idle_enter() and
- * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
- * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs
- * that are in such a section, considering these as in extended quiescent
- * state, so such a CPU is effectively never in an RCU read-side critical
- * section regardless of what RCU primitives it invokes. This state of
- * affairs is required --- we need to keep an RCU-free window in idle
- * where the CPU may possibly enter into low power mode. This way we can
- * notice an extended quiescent state to other CPUs that started a grace
- * period. Otherwise we would delay any grace period as long as we run in
- * the idle task.
- *
- * Similarly, we avoid claiming an SRCU read lock held if the current
- * CPU is offline.
+ * critical section unless it can prove otherwise.
*/
#ifdef CONFIG_PREEMPT_COUNT
-static inline int rcu_read_lock_sched_held(void)
-{
- int lockdep_opinion = 0;
-
- if (!debug_lockdep_rcu_enabled())
- return 1;
- if (!rcu_is_watching())
- return 0;
- if (!rcu_lockdep_current_cpu_online())
- return 0;
- if (debug_locks)
- lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
- return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
-}
+int rcu_read_lock_sched_held(void);
#else /* #ifdef CONFIG_PREEMPT_COUNT */
static inline int rcu_read_lock_sched_held(void)
{
diff --git a/init/Kconfig b/init/Kconfig
index 194bc7c1df93ca..9fbf6668a14b19 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -537,15 +537,6 @@ config RCU_STALL_COMMON
config CONTEXT_TRACKING
bool
-config RCU_USER_QS
- bool
- help
- This option sets hooks on kernel / userspace boundaries and
- puts RCU in extended quiescent state when the CPU runs in
- userspace. It means that when a CPU runs in userspace, it is
- excluded from the global RCU state machine and thus doesn't
- try to keep the timer tick on for RCU.
-
config CONTEXT_TRACKING_FORCE
bool "Force context tracking"
depends on CONTEXT_TRACKING
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 59e32684c23b58..1cead7806ca607 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1507,7 +1507,7 @@ static int rcu_torture_barrier_init(void)
int i;
int ret;
- if (n_barrier_cbs == 0)
+ if (n_barrier_cbs <= 0)
return 0;
if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) {
pr_alert("%s" TORTURE_FLAG
@@ -1786,12 +1786,15 @@ rcu_torture_init(void)
writer_task);
if (firsterr)
goto unwind;
- fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
- GFP_KERNEL);
- if (fakewriter_tasks == NULL) {
- VERBOSE_TOROUT_ERRSTRING("out of memory");
- firsterr = -ENOMEM;
- goto unwind;
+ if (nfakewriters > 0) {
+ fakewriter_tasks = kzalloc(nfakewriters *
+ sizeof(fakewriter_tasks[0]),
+ GFP_KERNEL);
+ if (fakewriter_tasks == NULL) {
+ VERBOSE_TOROUT_ERRSTRING("out of memory");
+ firsterr = -ENOMEM;
+ goto unwind;
+ }
}
for (i = 0; i < nfakewriters; i++) {
firsterr = torture_create_kthread(rcu_torture_fakewriter,
@@ -1818,7 +1821,7 @@ rcu_torture_init(void)
if (firsterr)
goto unwind;
}
- if (test_no_idle_hz) {
+ if (test_no_idle_hz && shuffle_interval > 0) {
firsterr = torture_shuffle_init(shuffle_interval * HZ);
if (firsterr)
goto unwind;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 65137bc28b2b36..cdb66d4ccc008e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -701,7 +701,7 @@ void rcu_idle_enter(void)
}
EXPORT_SYMBOL_GPL(rcu_idle_enter);
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_NO_HZ_FULL
/**
* rcu_user_enter - inform RCU that we are resuming userspace.
*
@@ -714,7 +714,7 @@ void rcu_user_enter(void)
{
rcu_eqs_enter(1);
}
-#endif /* CONFIG_RCU_USER_QS */
+#endif /* CONFIG_NO_HZ_FULL */
/**
* rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
@@ -828,7 +828,7 @@ void rcu_idle_exit(void)
}
EXPORT_SYMBOL_GPL(rcu_idle_exit);
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_NO_HZ_FULL
/**
* rcu_user_exit - inform RCU that we are exiting userspace.
*
@@ -839,7 +839,7 @@ void rcu_user_exit(void)
{
rcu_eqs_exit(1);
}
-#endif /* CONFIG_RCU_USER_QS */
+#endif /* CONFIG_NO_HZ_FULL */
/**
* rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
@@ -1178,9 +1178,11 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
j = jiffies;
gpa = READ_ONCE(rsp->gp_activity);
if (j - gpa > 2 * HZ)
- pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x\n",
+ pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x s%d ->state=%#lx\n",
rsp->name, j - gpa,
- rsp->gpnum, rsp->completed, rsp->gp_flags);
+ rsp->gpnum, rsp->completed,
+ rsp->gp_flags, rsp->gp_state,
+ rsp->gp_kthread ? rsp->gp_kthread->state : 0);
}
/*
@@ -2041,6 +2043,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
wait_event_interruptible(rsp->gp_wq,
READ_ONCE(rsp->gp_flags) &
RCU_GP_FLAG_INIT);
+ rsp->gp_state = RCU_GP_DONE_GPS;
/* Locking provides needed memory barrier. */
if (rcu_gp_init(rsp))
break;
@@ -2073,6 +2076,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
(!READ_ONCE(rnp->qsmask) &&
!rcu_preempt_blocked_readers_cgp(rnp)),
j);
+ rsp->gp_state = RCU_GP_DONE_FQS;
/* Locking provides needed memory barriers. */
/* If grace period done, leave loop. */
if (!READ_ONCE(rnp->qsmask) &&
@@ -2110,7 +2114,9 @@ static int __noreturn rcu_gp_kthread(void *arg)
}
/* Handle grace-period end. */
+ rsp->gp_state = RCU_GP_CLEANUP;
rcu_gp_cleanup(rsp);
+ rsp->gp_state = RCU_GP_CLEANED;
}
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4adb7ca0bf47a2..f1f4784f91074d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -527,7 +527,11 @@ struct rcu_state {
/* Values for rcu_state structure's gp_flags field. */
#define RCU_GP_WAIT_INIT 0 /* Initial state. */
#define RCU_GP_WAIT_GPS 1 /* Wait for grace-period start. */
-#define RCU_GP_WAIT_FQS 2 /* Wait for force-quiescent-state time. */
+#define RCU_GP_DONE_GPS 2 /* Wait done for grace-period start. */
+#define RCU_GP_WAIT_FQS 3 /* Wait for force-quiescent-state time. */
+#define RCU_GP_DONE_FQS 4 /* Wait done for force-quiescent-state time. */
+#define RCU_GP_CLEANUP 5 /* Grace-period cleanup started. */
+#define RCU_GP_CLEANED 6 /* Grace-period cleanup complete. */
extern struct list_head rcu_struct_flavors;
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index afaecb7a799af2..fec5f48b8860e9 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -62,6 +62,55 @@ MODULE_ALIAS("rcupdate");
module_param(rcu_expedited, int, 0);
+#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT)
+/**
+ * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
+ *
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
+ * RCU-sched read-side critical section. In absence of
+ * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
+ * critical section unless it can prove otherwise. Note that disabling
+ * of preemption (including disabling irqs) counts as an RCU-sched
+ * read-side critical section. This is useful for debug checks in functions
+ * that required that they be called within an RCU-sched read-side
+ * critical section.
+ *
+ * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
+ *
+ * Note that if the CPU is in the idle loop from an RCU point of
+ * view (ie: that we are in the section between rcu_idle_enter() and
+ * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
+ * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs
+ * that are in such a section, considering these as in extended quiescent
+ * state, so such a CPU is effectively never in an RCU read-side critical
+ * section regardless of what RCU primitives it invokes. This state of
+ * affairs is required --- we need to keep an RCU-free window in idle
+ * where the CPU may possibly enter into low power mode. This way we can
+ * notice an extended quiescent state to other CPUs that started a grace
+ * period. Otherwise we would delay any grace period as long as we run in
+ * the idle task.
+ *
+ * Similarly, we avoid claiming an SRCU read lock held if the current
+ * CPU is offline.
+ */
+int rcu_read_lock_sched_held(void)
+{
+ int lockdep_opinion = 0;
+
+ if (!debug_lockdep_rcu_enabled())
+ return 1;
+ if (!rcu_is_watching())
+ return 0;
+ if (!rcu_lockdep_current_cpu_online())
+ return 0;
+ if (debug_locks)
+ lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
+ return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
+}
+EXPORT_SYMBOL(rcu_read_lock_sched_held);
+#endif
+
#ifndef CONFIG_TINY_RCU
static atomic_t rcu_expedited_nesting =
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 579ce1b929afde..4008d9f95dd724 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -92,12 +92,10 @@ config NO_HZ_FULL
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
# We need at least one periodic CPU for timekeeping
depends on SMP
- # RCU_USER_QS dependency
depends on HAVE_CONTEXT_TRACKING
# VIRT_CPU_ACCOUNTING_GEN dependency
depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
select NO_HZ_COMMON
- select RCU_USER_QS
select RCU_NOCB_CPU
select VIRT_CPU_ACCOUNTING_GEN
select IRQ_WORK