aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 08:04:51 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 08:04:51 -0800
commit0246725d7399d7d6acc8fd5a1a0a1ffce9a1eaa3 (patch)
tree6571a85737da30030d804e7eab4243d79ea91e43
parent89f5349e0673322857bd432fa23113af56673739 (diff)
parent8a01ec97dc066009dd89e43bfcf55644f2dd6d19 (diff)
downloadlinux-0246725d7399d7d6acc8fd5a1a0a1ffce9a1eaa3.tar.gz
Merge tag 'ras_core_for_v6.3_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov: - Add support for reporting more bits of the physical address on error, on newer AMD CPUs - Mask out bits which don't belong to the address of the error being reported * tag 'ras_core_for_v6.3_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Mask out non-address bits from machine check bank x86/mce: Add support for Extended Physical Address MCA changes x86/mce: Define a function to extract ErrorAddr from MCA_ADDR
-rw-r--r--arch/x86/include/asm/mce.h3
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c12
-rw-r--r--arch/x86/kernel/cpu/mce/core.c32
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h44
-rw-r--r--drivers/edac/skx_common.c2
5 files changed, 62 insertions, 31 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6e986088817d51..9646ed6e8c0b95 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -88,6 +88,9 @@
#define MCI_MISC_ADDR_MEM 3 /* memory address */
#define MCI_MISC_ADDR_GENERIC 7 /* generic */
+/* MCi_ADDR register defines */
+#define MCI_ADDR_PHYSADDR GENMASK_ULL(boot_cpu_data.x86_phys_bits - 1, 0)
+
/* CTL2 register defines */
#define MCI_CTL2_CMCI_EN BIT_ULL(30)
#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 10fb5b5c9efa4c..23c5072fbbb76d 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -306,6 +306,8 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
if ((low & BIT(5)) && !((high >> 5) & 0x3))
high |= BIT(5);
+ this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
+
wrmsr(smca_config, low, high);
}
@@ -736,15 +738,7 @@ static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
if (m.status & MCI_STATUS_ADDRV) {
m.addr = addr;
- /*
- * Extract [55:<lsb>] where lsb is the least significant
- * *valid* bit of the address bits.
- */
- if (mce_flags.smca) {
- u8 lsb = (m.addr >> 56) & 0x3f;
-
- m.addr &= GENMASK_ULL(55, lsb);
- }
+ smca_extract_err_addr(&m);
}
if (mce_flags.smca) {
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2c8ec5c7171219..7832a69d170e72 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -67,13 +67,7 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
-struct mce_bank {
- u64 ctl; /* subevents to enable */
-
- __u64 init : 1, /* initialise bank? */
- __reserved_1 : 63;
-};
-static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
#define ATTR_LEN 16
/* One object for each MCE bank, shared by all CPUs */
@@ -579,7 +573,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
mce->severity != MCE_DEFERRED_SEVERITY)
return NOTIFY_DONE;
- pfn = mce->addr >> PAGE_SHIFT;
+ pfn = (mce->addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT;
if (!memory_failure(pfn, 0)) {
set_mce_nospec(pfn);
mce->kflags |= MCE_HANDLED_UC;
@@ -633,15 +627,7 @@ static noinstr void mce_read_aux(struct mce *m, int i)
m->addr <<= shift;
}
- /*
- * Extract [55:<lsb>] where lsb is the least significant
- * *valid* bit of the address bits.
- */
- if (mce_flags.smca) {
- u8 lsb = (m->addr >> 56) & 0x3f;
-
- m->addr &= GENMASK_ULL(55, lsb);
- }
+ smca_extract_err_addr(m);
}
if (mce_flags.smca) {
@@ -1308,6 +1294,7 @@ static void kill_me_maybe(struct callback_head *cb)
{
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
int flags = MF_ACTION_REQUIRED;
+ unsigned long pfn;
int ret;
p->mce_count = 0;
@@ -1316,9 +1303,10 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;
- ret = memory_failure(p->mce_addr >> PAGE_SHIFT, flags);
+ pfn = (p->mce_addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT;
+ ret = memory_failure(pfn, flags);
if (!ret) {
- set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
+ set_mce_nospec(pfn);
sync_core();
return;
}
@@ -1340,11 +1328,13 @@ static void kill_me_maybe(struct callback_head *cb)
static void kill_me_never(struct callback_head *cb)
{
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
+ unsigned long pfn;
p->mce_count = 0;
pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
- if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
- set_mce_nospec(p->mce_addr >> PAGE_SHIFT);
+ pfn = (p->mce_addr & MCI_ADDR_PHYSADDR) >> PAGE_SHIFT;
+ if (!memory_failure(pfn, 0))
+ set_mce_nospec(pfn);
}
static void queue_task_work(struct mce *m, char *msg, void (*func)(struct callback_head *))
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 7e03f5b7f6bd7d..91a415553c2721 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -177,6 +177,24 @@ struct mce_vendor_flags {
extern struct mce_vendor_flags mce_flags;
+struct mce_bank {
+ /* subevents to enable */
+ u64 ctl;
+
+ /* initialise bank? */
+ __u64 init : 1,
+
+ /*
+ * (AMD) MCA_CONFIG[McaLsbInStatusSupported]: When set, this bit indicates
+ * the LSB field is found in MCA_STATUS and not in MCA_ADDR.
+ */
+ lsb_in_status : 1,
+
+ __reserved_1 : 62;
+};
+
+DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
+
enum mca_msr {
MCA_CTL,
MCA_STATUS,
@@ -189,8 +207,34 @@ extern bool filter_mce(struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
extern bool amd_filter_mce(struct mce *m);
+
+/*
+ * If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits
+ * [56:0] of MCA_STATUS, else in bits [55:0] of MCA_ADDR.
+ */
+static __always_inline void smca_extract_err_addr(struct mce *m)
+{
+ u8 lsb;
+
+ if (!mce_flags.smca)
+ return;
+
+ if (this_cpu_ptr(mce_banks_array)[m->bank].lsb_in_status) {
+ lsb = (m->status >> 24) & 0x3f;
+
+ m->addr &= GENMASK_ULL(56, lsb);
+
+ return;
+ }
+
+ lsb = (m->addr >> 56) & 0x3f;
+
+ m->addr &= GENMASK_ULL(55, lsb);
+}
+
#else
static inline bool amd_filter_mce(struct mce *m) { return false; }
+static inline void smca_extract_err_addr(struct mce *m) { }
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index f0f8e98f6efb28..806986f0317773 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -657,7 +657,7 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
memset(&res, 0, sizeof(res));
res.mce = mce;
- res.addr = mce->addr;
+ res.addr = mce->addr & MCI_ADDR_PHYSADDR;
/* Try driver decoder first */
if (!(driver_decode && driver_decode(&res))) {