aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2012-03-07 20:51:03 +0100
committerSebastian Andrzej Siewior <bigeasy@linutronix.de>2016-02-13 00:36:19 +0100
commit1d1ff9b66de17856eea601244f708317255254d3 (patch)
treec36d93ae7d1507110bf81de94ab23cdb9e0951bc
parentb4f830750ef6c90ae2ed2a60a5c90e489773e712 (diff)
downloadrt-linux-1d1ff9b66de17856eea601244f708317255254d3.tar.gz
rt: Introduce cpu_chill()
Retry loops on RT might loop forever when the modifying side was preempted. Add cpu_chill() to replace cpu_relax(). cpu_chill() defaults to cpu_relax() for non RT. On RT it puts the looping task to sleep for a tick so the preempted task can make progress. Steven Rostedt changed it to use a hrtimer instead of msleep(): | |Ulrich Obergfell pointed out that cpu_chill() calls msleep() which is woken |up by the ksoftirqd running the TIMER softirq. But as the cpu_chill() is |called from softirq context, it may block the ksoftirqd() from running, in |which case, it may never wake up the msleep() causing the deadlock. | |I checked the vmcore, and irq/74-qla2xxx is stuck in the msleep() call, |running on CPU 8. The one ksoftirqd that is stuck, happens to be the one that |runs on CPU 8, and it is blocked on a lock held by irq/74-qla2xxx. As that |ksoftirqd is the one that will wake up irq/74-qla2xxx, and it happens to be |blocked on a lock that irq/74-qla2xxx holds, we have our deadlock. | |The solution is not to convert the cpu_chill() back to a cpu_relax() as that |will re-create a possible live lock that the cpu_chill() fixed earlier, and may |also leave this bug open on other softirqs. The fix is to remove the |dependency on ksoftirqd from cpu_chill(). That is, instead of calling |msleep() that requires ksoftirqd to wake it up, use the |hrtimer_nanosleep() code that does the wakeup from hard irq context. | ||Looks to be the lock of the block softirq. I don't have the core dump ||anymore, but from what I could tell the ksoftirqd was blocked on the ||block softirq lock, where the block softirq handler did a msleep ||(called by the qla2xxx interrupt handler). || ||Looking at trigger_softirq() in block/blk-softirq.c, it can do a ||smp_callfunction() to another cpu to run the block softirq. If that ||happens to be the cpu where the qla2xx irq handler is doing the block ||softirq and is in a middle of a msleep(), I believe the ksoftirqd will ||try to run the softirq. If it does that, then BOOM, it's deadlocked ||because the ksoftirqd will never run the timer softirq either. | ||I should have also stated that it was only one lock that was involved. ||But the lock owner was doing a msleep() that requires a wakeup by ||ksoftirqd to continue. If ksoftirqd happens to be blocked on a lock ||held by the msleep() caller, then you have your deadlock. || ||It's best not to have any softirqs going to sleep requiring another ||softirq to wake it up. Note, if we ever require a timer softirq to do a ||cpu_chill() it will most definitely hit this deadlock. + bigeasy: add PF_NOFREEZE: | [....] Waiting for /dev to be fully populated... | ===================================== | [ BUG: udevd/229 still has locks held! ] | 3.12.11-rt17 #23 Not tainted | ------------------------------------- | 1 lock held by udevd/229: | #0: (&type->i_mutex_dir_key#2){+.+.+.}, at: lookup_slow+0x28/0x98 | | stack backtrace: | CPU: 0 PID: 229 Comm: udevd Not tainted 3.12.11-rt17 #23 | (unwind_backtrace+0x0/0xf8) from (show_stack+0x10/0x14) | (show_stack+0x10/0x14) from (dump_stack+0x74/0xbc) | (dump_stack+0x74/0xbc) from (do_nanosleep+0x120/0x160) | (do_nanosleep+0x120/0x160) from (hrtimer_nanosleep+0x90/0x110) | (hrtimer_nanosleep+0x90/0x110) from (cpu_chill+0x30/0x38) | (cpu_chill+0x30/0x38) from (dentry_kill+0x158/0x1ec) | (dentry_kill+0x158/0x1ec) from (dput+0x74/0x15c) | (dput+0x74/0x15c) from (lookup_real+0x4c/0x50) | (lookup_real+0x4c/0x50) from (__lookup_hash+0x34/0x44) | (__lookup_hash+0x34/0x44) from (lookup_slow+0x38/0x98) | (lookup_slow+0x38/0x98) from (path_lookupat+0x208/0x7fc) | (path_lookupat+0x208/0x7fc) from (filename_lookup+0x20/0x60) | (filename_lookup+0x20/0x60) from (user_path_at_empty+0x50/0x7c) | (user_path_at_empty+0x50/0x7c) from (user_path_at+0x14/0x1c) | (user_path_at+0x14/0x1c) from (vfs_fstatat+0x48/0x94) | (vfs_fstatat+0x48/0x94) from (SyS_stat64+0x14/0x30) | (SyS_stat64+0x14/0x30) from (ret_fast_syscall+0x0/0x48) Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-rw-r--r--include/linux/delay.h6
-rw-r--r--kernel/time/hrtimer.c19
2 files changed, 25 insertions, 0 deletions
diff --git a/include/linux/delay.h b/include/linux/delay.h
index a6ecb34cf547d..37caab306336a 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds)
msleep(seconds * 1000);
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+extern void cpu_chill(void);
+#else
+# define cpu_chill() cpu_relax()
+#endif
+
#endif /* defined(_LINUX_DELAY_H) */
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 774f2deec0bd2..5bdff2bbe134b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1775,6 +1775,25 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+/*
+ * Sleep for 1 ms in hope whoever holds what we want will let it go.
+ */
+void cpu_chill(void)
+{
+ struct timespec tu = {
+ .tv_nsec = NSEC_PER_MSEC,
+ };
+ unsigned int freeze_flag = current->flags & PF_NOFREEZE;
+
+ current->flags |= PF_NOFREEZE;
+ hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+ if (!freeze_flag)
+ current->flags &= ~PF_NOFREEZE;
+}
+EXPORT_SYMBOL(cpu_chill);
+#endif
+
/*
* Functions related to boot-time initialization:
*/