aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-23 09:28:37 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-23 09:28:37 -0800
commit525445efacdfeed71329ce8bc5f558859a894b8b (patch)
tree6cdf76560eae119b561bade1846dffa3b182da21
parent192a5e0a19712a079f456954c203ce9dd2b889fa (diff)
parent344da544f177f919cf6919e5abcd388f27aa53db (diff)
downloadlinux-525445efacdfeed71329ce8bc5f558859a894b8b.tar.gz
Merge tag 'nmi.2023.02.14a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu
Pull x86 NMI diagnostics from Paul McKenney: "Add diagnostics to the x86 NMI handler to help detect NMI-handler bugs on the one hand and failing hardware on the other" * tag 'nmi.2023.02.14a' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu: x86/nmi: Print reasons why backtrace NMIs are ignored x86/nmi: Accumulate NMI-progress evidence in exc_nmi()
-rw-r--r--arch/x86/kernel/nmi.c108
-rw-r--r--include/linux/nmi.h8
-rw-r--r--lib/Kconfig.debug11
-rw-r--r--lib/nmi_backtrace.c2
4 files changed, 128 insertions, 1 deletions
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index cec0bfa3bc04f..c315b18ec7c87 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -69,6 +69,15 @@ struct nmi_stats {
unsigned int unknown;
unsigned int external;
unsigned int swallow;
+ unsigned long recv_jiffies;
+ unsigned long idt_seq;
+ unsigned long idt_nmi_seq;
+ unsigned long idt_ignored;
+ atomic_long_t idt_calls;
+ unsigned long idt_seq_snap;
+ unsigned long idt_nmi_seq_snap;
+ unsigned long idt_ignored_snap;
+ long idt_calls_snap;
};
static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
@@ -479,12 +488,15 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
DEFINE_IDTENTRY_RAW(exc_nmi)
{
irqentry_state_t irq_state;
+ struct nmi_stats *nsp = this_cpu_ptr(&nmi_stats);
/*
* Re-enable NMIs right here when running as an SEV-ES guest. This might
* cause nested NMIs, but those can be handled safely.
*/
sev_es_nmi_complete();
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
+ arch_atomic_long_inc(&nsp->idt_calls);
if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
return;
@@ -495,6 +507,11 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
}
this_cpu_write(nmi_state, NMI_EXECUTING);
this_cpu_write(nmi_cr2, read_cr2());
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
+ WARN_ON_ONCE(!(nsp->idt_seq & 0x1));
+ WRITE_ONCE(nsp->recv_jiffies, jiffies);
+ }
nmi_restart:
/*
@@ -509,8 +526,19 @@ nmi_restart:
inc_irq_stat(__nmi_count);
- if (!ignore_nmis)
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
+ WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
+ } else if (!ignore_nmis) {
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
+ WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
+ }
default_do_nmi(regs);
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
+ WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
+ }
+ }
irqentry_nmi_exit(regs, irq_state);
@@ -525,6 +553,11 @@ nmi_restart:
if (user_mode(regs))
mds_user_clear_cpu_buffers();
+ if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+ WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
+ WARN_ON_ONCE(nsp->idt_seq & 0x1);
+ WRITE_ONCE(nsp->recv_jiffies, jiffies);
+ }
}
#if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
@@ -537,6 +570,79 @@ DEFINE_IDTENTRY_RAW(exc_nmi_noist)
EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
#endif
+#ifdef CONFIG_NMI_CHECK_CPU
+
+static char *nmi_check_stall_msg[] = {
+/* */
+/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */
+/* | +------ cpu_is_offline(cpu) */
+/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */
+/* | | | NMI handler has been invoked. */
+/* | | | */
+/* V V V */
+/* 0 0 0 */ "NMIs are not reaching exc_nmi() handler",
+/* 0 0 1 */ "exc_nmi() handler is ignoring NMIs",
+/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi() handler",
+/* 0 1 1 */ "CPU is offline and exc_nmi() handler is legitimately ignoring NMIs",
+/* 1 0 0 */ "CPU is in exc_nmi() handler and no further NMIs are reaching handler",
+/* 1 0 1 */ "CPU is in exc_nmi() handler which is legitimately ignoring NMIs",
+/* 1 1 0 */ "CPU is offline in exc_nmi() handler and no more NMIs are reaching exc_nmi() handler",
+/* 1 1 1 */ "CPU is offline in exc_nmi() handler which is legitimately ignoring NMIs",
+};
+
+void nmi_backtrace_stall_snap(const struct cpumask *btp)
+{
+ int cpu;
+ struct nmi_stats *nsp;
+
+ for_each_cpu(cpu, btp) {
+ nsp = per_cpu_ptr(&nmi_stats, cpu);
+ nsp->idt_seq_snap = READ_ONCE(nsp->idt_seq);
+ nsp->idt_nmi_seq_snap = READ_ONCE(nsp->idt_nmi_seq);
+ nsp->idt_ignored_snap = READ_ONCE(nsp->idt_ignored);
+ nsp->idt_calls_snap = atomic_long_read(&nsp->idt_calls);
+ }
+}
+
+void nmi_backtrace_stall_check(const struct cpumask *btp)
+{
+ int cpu;
+ int idx;
+ unsigned long nmi_seq;
+ unsigned long j = jiffies;
+ char *modp;
+ char *msgp;
+ char *msghp;
+ struct nmi_stats *nsp;
+
+ for_each_cpu(cpu, btp) {
+ nsp = per_cpu_ptr(&nmi_stats, cpu);
+ modp = "";
+ msghp = "";
+ nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
+ if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
+ msgp = "CPU entered NMI handler function, but has not exited";
+ } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) {
+ msgp = "CPU is handling NMIs";
+ } else {
+ idx = ((nsp->idt_seq_snap & 0x1) << 2) |
+ (cpu_is_offline(cpu) << 1) |
+ (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
+ msgp = nmi_check_stall_msg[idx];
+ if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
+ modp = ", but OK because ignore_nmis was set";
+ if (nmi_seq & ~0x1)
+ msghp = " (CPU currently in NMI handler function)";
+ else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
+ msghp = " (CPU exited one NMI handler function)";
+ }
+ pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n",
+ __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies));
+ }
+}
+
+#endif
+
void stop_nmi(void)
{
ignore_nmis++;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index f700ff2df074e..048c0b9aa623d 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -214,4 +214,12 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *);
#include <asm/nmi.h>
#endif
+#ifdef CONFIG_NMI_CHECK_CPU
+void nmi_backtrace_stall_snap(const struct cpumask *btp);
+void nmi_backtrace_stall_check(const struct cpumask *btp);
+#else
+static inline void nmi_backtrace_stall_snap(const struct cpumask *btp) {}
+static inline void nmi_backtrace_stall_check(const struct cpumask *btp) {}
+#endif
+
#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cecae75d7519d..5a69b3805b1ce 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1562,6 +1562,17 @@ config TRACE_IRQFLAGS_NMI
depends on TRACE_IRQFLAGS
depends on TRACE_IRQFLAGS_NMI_SUPPORT
+config NMI_CHECK_CPU
+ bool "Debugging for CPUs failing to respond to backtrace requests"
+ depends on DEBUG_KERNEL
+ depends on X86
+ default n
+ help
+ Enables debug prints when a CPU fails to respond to a given
+ backtrace NMI. These prints provide some reasons why a CPU
+ might legitimately be failing to respond, for example, if it
+ is offline of if ignore_nmis is set.
+
config DEBUG_IRQFLAGS
bool "Debug IRQ flag manipulation"
help
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index d01aec6ae15c8..5274bbb026d79 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -64,6 +64,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
if (!cpumask_empty(to_cpumask(backtrace_mask))) {
pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n",
this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask));
+ nmi_backtrace_stall_snap(to_cpumask(backtrace_mask));
raise(to_cpumask(backtrace_mask));
}
@@ -74,6 +75,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
mdelay(1);
touch_softlockup_watchdog();
}
+ nmi_backtrace_stall_check(to_cpumask(backtrace_mask));
/*
* Force flush any remote buffers that might be stuck in IRQ context