diff options
author | Tony Luck <tony.luck@intel.com> | 2022-10-13 10:00:46 -0700 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2022-10-13 10:00:46 -0700 |
commit | 7424ca7e42b45115462d8c9bfe89364c3ec33d2b (patch) | |
tree | 04d7af01bdf5ec73bbf451771f9e24162ba35c0a | |
parent | bd845af6ae49d614be3f5ed4c2915ac5c1ffbea7 (diff) | |
parent | eada934874aa845d92a2782c21d820ff27488060 (diff) | |
download | ras-tools-7424ca7e42b45115462d8c9bfe89364c3ec33d2b.tar.gz |
Merge from https://gitee.com/anolis/ras-tools.git
Lots of bugs fixes & cleanups. Plus ARM support!
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | do_memcpy.S | 20 | ||||
-rw-r--r-- | einj_mem_uc.c | 484 | ||||
-rw-r--r-- | memattr/LICENSE | 339 | ||||
-rw-r--r-- | memattr/Makefile | 10 | ||||
-rw-r--r-- | memattr/README | 56 | ||||
-rw-r--r-- | memattr/pgprot_drv.c | 188 | ||||
-rw-r--r-- | memattr/test.c | 223 | ||||
-rw-r--r-- | ras-tolerance/Makefile | 11 | ||||
-rw-r--r-- | ras-tolerance/README | 9 | ||||
-rw-r--r-- | ras-tolerance/ras_tolerance.c | 246 |
12 files changed, 1565 insertions, 26 deletions
@@ -5,4 +5,7 @@ hornet lmce mca-recover vtop +.vscode +modules.order +*.cmd @@ -23,7 +23,7 @@ hornet: hornet.c cc -o hornet $(CFLAGS) hornet.c einj_mem_uc: einj_mem_uc.o proc_cpuinfo.o proc_interrupt.o proc_pagemap.o do_memcpy.o - cc -o einj_mem_uc einj_mem_uc.o proc_cpuinfo.o proc_interrupt.o proc_pagemap.o do_memcpy.o + cc -o einj_mem_uc einj_mem_uc.o proc_cpuinfo.o proc_interrupt.o proc_pagemap.o do_memcpy.o -pthread lmce: proc_pagemap.o lmce.o cc -o lmce proc_pagemap.o lmce.o -pthread diff --git a/do_memcpy.S b/do_memcpy.S index d04f28b..fb37f20 100644 --- a/do_memcpy.S +++ b/do_memcpy.S @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#ifdef __x86_64__ .globl do_memcpy .type do_memcpy, @function do_memcpy: @@ -10,3 +11,22 @@ do_memcpy: retq .cfi_endproc .size do_memcpy, .-do_memcpy + +#elif __aarch64__ +dst .req x0 +src .req x1 +count .req x2 +tmp .req w3 + + + .globl do_memcpy +do_memcpy: + cbz count, .Lexit + ldrb tmp, [src], #0x1 + strb tmp, [dst], #0x1 + sub count, count, #0x1 + b.ge do_memcpy +.Lexit: + ret +#endif + diff --git a/einj_mem_uc.c b/einj_mem_uc.c index 14c55f6..90cf5c9 100644 --- a/einj_mem_uc.c +++ b/einj_mem_uc.c @@ -25,6 +25,11 @@ #include <errno.h> #include <sys/syscall.h> #include <linux/futex.h> +#include <pthread.h> + +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 +#endif extern long long vtop(long long); extern void proc_cpuinfo(int *nsockets, int *ncpus, char *model, int *modelnum, int **apicmap); @@ -50,6 +55,22 @@ static int *apicmap; #define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags" #define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger" #define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject" +#define EINJ_VENDOR "/sys/kernel/debug/apei/einj/vendor" + +/* + * Vendor extensions for platform specific operations + */ +struct vendor_error_type_extension { + int32_t length; + int32_t pcie_sbdf; + int16_t vendor_id; + int16_t device_id; + int8_t rev_id; + int8_t reserved[3]; +}; + +#define PRINT_INJECTING printf("injecting ...\n") +#define PRINT_TRIGGERING printf("triggering ...\n") static int check_errortype_available(char *file, unsigned long long val) { @@ -99,6 +120,8 @@ static void wfile(char *file, unsigned long long val) static void inject_uc(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + if (Sflag) { vaddr = (void *)((long)vaddr & ~(pagesize - 1)); madvise(vaddr, pagesize, MADV_HWPOISON); @@ -113,10 +136,51 @@ static void inject_uc(unsigned long long addr, void *vaddr, int notrigger) wfile(EINJ_DOIT, 1); } +static void inject_core_ce(unsigned long long addr, void *vaddr, int notrigger) +{ + unsigned int cpu; + + PRINT_INJECTING; + cpu = sched_getcpu(); + wfile(EINJ_ETYPE, 0x1); + wfile(EINJ_APIC, cpu); + wfile(EINJ_FLAGS, 1); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +static void inject_core_non_fatal(unsigned long long addr, void *vaddr, int notrigger) +{ + unsigned int cpu; + + PRINT_INJECTING; + cpu = sched_getcpu(); + wfile(EINJ_ETYPE, 0x2); + wfile(EINJ_APIC, cpu); + wfile(EINJ_FLAGS, 1); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +static void inject_core_fatal(unsigned long long addr, void *vaddr, int notrigger) +{ + unsigned int cpu; + + PRINT_INJECTING; + cpu = sched_getcpu(); + wfile(EINJ_ETYPE, 0x4); + wfile(EINJ_APIC, cpu); + wfile(EINJ_FLAGS, 1); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +#ifdef __x86_64__ static void inject_llc(unsigned long long addr, void *vaddr, int notrigger) { unsigned cpu; + PRINT_INJECTING; cpu = sched_getcpu(); wfile(EINJ_ETYPE, 0x2); wfile(EINJ_ADDR, addr); @@ -126,7 +190,123 @@ static void inject_llc(unsigned long long addr, void *vaddr, int notrigger) wfile(EINJ_NOTRIGGER, notrigger); wfile(EINJ_DOIT, 1); } +#elif __aarch64__ +static void inject_llc(unsigned long long addr, void *vaddr, int notrigger) +{ + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x400); + wfile(EINJ_MASK, 0x01); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + + +static void inject_cmn_fatal(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x800); + wfile(EINJ_MASK, 0x01); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +static void inject_gic_ce(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x200); + wfile(EINJ_MASK, 0x02); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +static void inject_gic_non_fatal(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x400); + wfile(EINJ_MASK, 0x02); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +static void inject_smmu_tcu_ce(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x200); + wfile(EINJ_MASK, 0x03); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +static void inject_smmu_tcu_non_fatal(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x400); + wfile(EINJ_MASK, 0x03); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +static void inject_smmu_tcu_fatal(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x800); + wfile(EINJ_MASK, 0x03); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +static void inject_smmu_tbu_ce(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x200); + wfile(EINJ_MASK, 0x04); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} +static void inject_smmu_tbu_non_fatal(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x400); + wfile(EINJ_MASK, 0x04); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +static void inject_smmu_tbu_fatal(unsigned long long addr, void *vaddr, int notrigger) { + PRINT_INJECTING; + wfile(EINJ_ETYPE, 0x800); + wfile(EINJ_MASK, 0x04); + wfile(EINJ_FLAGS, 0x01); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} +#endif + +static int is_privileged(void) +{ + if (getuid() != 0) { + fprintf(stderr, "%s: must be root to run error injection tests\n", progname); + return 0; + } + return 1; +} + +static int is_einj_support(void) +{ + if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1) { + fprintf(stderr, "%s: Error injection not supported, check your BIOS settings\n", progname); + return 0; + } + if (access(EINJ_NOTRIGGER, R_OK|W_OK) == -1) { + fprintf(stderr, "%s: Is the einj.ko module loaded?\n", progname); + return 0; + } + return 1; +} + +#ifdef __x86_64__ static int is_advanced_ras(char *model, int modelnum) { switch (modelnum) { @@ -148,18 +328,9 @@ static void check_configuration(void) char model[512]; int modelnum; - if (getuid() != 0) { - fprintf(stderr, "%s: must be root to run error injection tests\n", progname); - exit(1); - } - if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1) { - fprintf(stderr, "%s: Error injection not supported, check your BIOS settings\n", progname); + if (!is_privileged() || !is_einj_support()) exit(1); - } - if (access(EINJ_NOTRIGGER, R_OK|W_OK) == -1) { - fprintf(stderr, "%s: Is the einj.ko module loaded?\n", progname); - exit(1); - } + model[0] = '\0'; proc_cpuinfo(&nsockets, &ncpus, model, &modelnum, &apicmap); if (nsockets == 0 || ncpus == 0) { @@ -176,6 +347,38 @@ static void check_configuration(void) exit(1); } } +#elif __aarch64__ + +static int is_advanced_ras(void) +{ + FILE *fp = fopen(EINJ_VENDOR, "r"); + struct vendor_error_type_extension v; + int8_t domain, bus, dev, func; + int ret; + + ret = fscanf(fp, "%x:%x:%x.%x vendor_id=%x device_id=%x rev_id=%x\n", + &domain, &bus, &dev, &func, + &v.vendor_id, &v.device_id, &v.rev_id); + + if (ret != 7) + exit(1); + + switch (v.vendor_id) { + case 0x1ded: + return 1; + default: + return 0; + } +} + +static void check_configuration(void) +{ + if (!is_privileged() || !is_einj_support()) + exit(1); + if (!is_advanced_ras()) + exit(1); +} +#endif #define REP9(stmt) stmt;stmt;stmt;stmt;stmt;stmt;stmt;stmt;stmt @@ -205,6 +408,41 @@ static void *thp_data_alloc(void) return p + MB(64); } +int get_huge_pagesize(void) +{ + FILE *fp; + char *line = NULL; + size_t linelen = 0; + int hpagesize = 0; + if ((fp = fopen("/proc/meminfo", "r")) == NULL) { + fprintf(stderr, "open /proc/meminfo"); + exit(1); + } + while (getline(&line, &linelen, fp) > 0) { + if (sscanf(line, "Hugepagesize: %d kB", &hpagesize) >= 1) + break; + } + free(line); + fclose(fp); + return hpagesize * 1024; +} + +static void *hugetlb_alloc(void) +{ + int HPS = get_huge_pagesize(); + char *p = mmap(NULL, HPS, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON|MAP_HUGETLB, -1, 0); + int i; + + if (p == NULL) { + fprintf(stderr, "%s: cannot allocate memory\n", progname); + exit(1); + } + srandom(getpid() * time(NULL)); + for (i = 0; i < HPS; i++) + p[i] = random(); + return p + HPS / 4; +} + static void *data_alloc(void) { char *p = mmap(NULL, pagesize, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANON, -1, 0); @@ -275,13 +513,22 @@ static void *instr_alloc(void) return (void *)((long)p & ~(pagesize - 1)); } +/* + * parameters to the single and write tests. + */ +int trigger_offset = 0; /* where to hit after the poison addr */ + int trigger_single(char *addr) { - return addr[0]; + char *target = addr + trigger_offset; + + PRINT_TRIGGERING; + return target[0]; } int trigger_double(char *addr) { + PRINT_TRIGGERING; return addr[0] + addr[1]; } @@ -289,15 +536,98 @@ int trigger_split(char *addr) { long *a = (long *)(addr - 1); + PRINT_TRIGGERING; return a[0]; } int trigger_write(char *addr) { - addr[0] = 'a'; + char *target = addr + trigger_offset; + + PRINT_TRIGGERING; + target[0] = 'a'; + return 0; +} + +#ifdef __aarch64__ +#define __put_mem_asm(store, reg, x, addr) \ + asm volatile( \ + store " " reg "0, [%1]\n" \ + : \ + : "r" (x), "r" (addr)) + +int trigger_write_byte(char *addr) +{ + int8_t __pu_val = 0x1E; + char *target = addr + trigger_offset; + + PRINT_TRIGGERING; + __put_mem_asm("strb", "%w", __pu_val, target); + + return 0; +} + +int trigger_write_word(char *addr) +{ + int16_t __pu_val = 0x1EFF; + char *target = addr + trigger_offset; + + PRINT_TRIGGERING; + __put_mem_asm("strh", "%w", __pu_val, target); + return 0; } +int trigger_write_dword(char *addr) +{ + int32_t __pu_val = 0x1FFFEEEE; + char *target = addr + trigger_offset; + + PRINT_TRIGGERING; + __put_mem_asm("str", "%w", __pu_val, target); + return 0; +} + +int trigger_write_qword(char *addr) +{ + int64_t __pu_val = 0x1EEEFFFFFEEEE; + char *target = addr + trigger_offset; + + PRINT_TRIGGERING; + __put_mem_asm("str", "%x", __pu_val, target); + return 0; +} +#endif + +int thread(char *addr) +{ + printf(">> trigger_thread\n"); + + return addr[0]; +} + +int trigger_thread(char *addr) +{ + unsigned long ret; + pthread_t id1, id2; + + ret = pthread_create(&id1, NULL, (void*)thread, addr); + if (ret != 0) { + printf("create pthread error\n"); + exit(1); + } + + ret = pthread_create(&id2, NULL, (void*)thread, addr); + if (ret != 0) { + printf("create pthread error\n"); + exit(1); + } + + pthread_join(id1, NULL); + pthread_join(id2, NULL); + + return 0; +} /* * parameters to the memcpy and copyin tests. */ @@ -342,6 +672,7 @@ int trigger_memcpy(char *addr) char *src = addr - memcpy_runup; char *dst = addr + pagesize / 2; + PRINT_TRIGGERING; dst -= memcpy_align; do_memcpy(dst, src, memcpy_size); return 0; @@ -359,6 +690,7 @@ int trigger_copyin(char *addr) return -1; } (void)unlink(filename); + PRINT_TRIGGERING; if ((ret = write(copyin_fd, addr - memcpy_runup, memcpy_size)) != memcpy_size) { if (ret == -1) fprintf(stderr, "%s: couldn't write temp file (errno=%d)\n", progname, errno); @@ -379,6 +711,7 @@ int trigger_copyout(char *addr) return -1; } rewind(pcfile); + PRINT_TRIGGERING; ret = fread(buf, 1, pagesize, pcfile); fprintf(stderr, "%s: read returned %d\n", progname); @@ -387,17 +720,29 @@ int trigger_copyout(char *addr) int trigger_patrol(char *addr) { + PRINT_TRIGGERING; sleep(1); } +#ifdef __x86_64__ int trigger_llc(char *addr) { + PRINT_TRIGGERING; asm volatile("clflush %0" : "+m" (*addr)); } +#elif __aarch64__ +int trigger_llc(char *addr) +{ + asm volatile("dc civac, %0" : : "r" (addr) : "memory"); +} +#endif int trigger_instr(char *addr) { - int ret = dosums(); + int ret; + + PRINT_TRIGGERING; + ret = dosums(); if (ret != 729) printf("Corruption during instruction fault recovery (%d)\n", ret); @@ -415,6 +760,7 @@ int trigger_futex(char *addr) { int ret; + PRINT_TRIGGERING; ret = futex((int *)addr, FUTEX_WAIT, 0, NULL, NULL, 0); if (ret == -1) printf("futex returned with errno=%d\n", errno); @@ -458,9 +804,71 @@ struct test { thp_data_alloc, inject_uc, 1, trigger_single, F_MCE|F_CMCI|F_SIGBUS, }, { + "hugetlb", "Try to inject in hugetlb page, generates SRAR machine check", + hugetlb_alloc, inject_uc, 1, trigger_single, F_MCE|F_CMCI|F_SIGBUS, + }, + { "store", "Write to target address. Should generate a UCNA/CMCI", data_alloc, inject_uc, 1, trigger_write, F_CMCI, }, +#ifdef __aarch64__ + { + "cmn_non_fatal", "CMN SLC Data RAM DE. Should generate a UCNA/CMCI", + data_alloc, inject_llc, 1, trigger_single, F_CMCI, + }, + { + "cmn_fatal", "CMN SLC Data RAM UE. Should fatal", + data_alloc, inject_cmn_fatal, 1, trigger_single, F_FATAL, + }, + { + "gic_ce", "GIC corrected error. Should generate a CMCI", + data_alloc, inject_gic_ce, 1, trigger_single, F_CMCI, + }, + { + "gic_non_fatal", "GIC deferred error", + data_alloc, inject_gic_non_fatal, 1, trigger_single, F_CMCI, + }, + { + "smmu_tcu_ce", "SMMU TCU corrected error. Should generate a UCNA/CMCI", + data_alloc, inject_smmu_tcu_ce, 1, trigger_single, F_CMCI, + }, + { + "smmu_tcu_non_fatal", "SMMU TCU deferred error. Should generate a UCNA/CMCI", + data_alloc, inject_smmu_tcu_non_fatal, 1, trigger_single, F_CMCI, + }, + { + "smmu_tcu_fatal", "SMMU TCU uncorrected error. Should fatal", + data_alloc, inject_smmu_tcu_fatal, 1, trigger_single, F_FATAL, + }, + { + "smmu_tbu_ce", "SMMU TBU corrected error. Should generate a UCNA/CMCI", + data_alloc, inject_smmu_tbu_ce, 1, trigger_single, F_CMCI, + }, + { + "smmu_tbu_non_fatal", "SMMU TBU deferred error. Should generate a UCNA/CMCI", + data_alloc, inject_smmu_tbu_non_fatal, 1, trigger_single, F_CMCI, + }, + { + "smmu_tbu_fatal", "SMMU TBU uncorrected error. Should fatal", + data_alloc, inject_smmu_tbu_fatal, 1, trigger_single, F_FATAL, + }, + { + "strbyte", "Write to target address. Should generate a UCNA/CMCI", + data_alloc, inject_uc, 1, trigger_write_byte, F_CMCI, + }, + { + "strword", "Write to target address. Should generate a UCNA/CMCI", + data_alloc, inject_uc, 1, trigger_write_word, F_CMCI, + }, + { + "strdword", "Write to target address. Should generate a UCNA/CMCI", + data_alloc, inject_uc, 1, trigger_write_dword, F_CMCI, + }, + { + "strqword", "Write to target address. Should generate a UCNA/CMCI", + data_alloc, inject_uc, 1, trigger_write_qword, F_CMCI, + }, +#endif { "memcpy", "Streaming read from target address. Probably fatal", data_alloc, inject_uc, 1, trigger_memcpy, F_MCE|F_CMCI|F_SIGBUS|F_FATAL, @@ -474,6 +882,10 @@ struct test { data_alloc, inject_uc, 0, trigger_patrol, F_EITHER|F_LONGWAIT, }, { + "thread", "Single read by two threads to target address at the same time, generates SRAR machine check", + data_alloc, inject_uc, 1, trigger_thread, F_MCE|F_CMCI|F_SIGBUS, + }, + { "llc", "Cache write-back, generates SRAO machine check", data_alloc, inject_llc, 1, trigger_llc, F_MCE, }, @@ -493,6 +905,18 @@ struct test { "mlock", "mlock target page then inject/read to generates SRAR machine check", mlock_data_alloc, inject_uc, 1, trigger_single, F_MCE|F_CMCI|F_SIGBUS, }, + { + "core_ce", "Core corrected error", + data_alloc, inject_core_ce, 1, trigger_single, F_CMCI, + }, + { + "core_non_fatal", "Core deferred error", + data_alloc, inject_core_non_fatal, 1, trigger_single, F_CMCI, + }, + { + "core_fatal", "Core uncorrected error. Should fatal", + data_alloc, inject_core_fatal, 1, trigger_single, F_CMCI|F_FATAL, + }, { NULL } }; @@ -532,7 +956,7 @@ static jmp_buf env; static void recover(int sig, siginfo_t *si, void *v) { - printf("SIGBUS: addr = %p\n", si->si_addr); + printf("signal %d code %d addr %p\n", sig, si->si_code, si->si_addr); siglongjmp(env, 1); } @@ -544,19 +968,22 @@ struct sigaction recover_act = { int main(int argc, char **argv) { int c, i; - int count = 1, cmci_wait_count = 0; + int count = 1; double delay = 1.0; struct test *t; void *vaddr; long long paddr; +#ifdef __x86_64__ + int cmci_wait_count = 0; + int either; long b_mce, b_cmci, a_mce, a_cmci; struct timeval t1, t2; - int either; +#endif progname = argv[0]; pagesize = getpagesize(); - while ((c = getopt(argc, argv, "ac:d:fhim:S")) != -1) switch (c) { + while ((c = getopt(argc, argv, "ac:d:fhim:z:S")) != -1) switch (c) { case 'a': all_flag = 1; break; @@ -575,6 +1002,9 @@ int main(int argc, char **argv) case 'm': parse_memcpy(optarg); break; + case 'z': + trigger_offset = strtod(optarg, NULL); + break; case 'S': Sflag = 1; break; @@ -583,7 +1013,8 @@ int main(int argc, char **argv) break; } - check_configuration(); + if (Sflag == 0) + check_configuration(); if (optind < argc) t = lookup_test(argv[optind]); @@ -598,20 +1029,22 @@ int main(int argc, char **argv) sigaction(SIGBUS, &recover_act, NULL); for (i = 0; i < count; i++) { - cmci_wait_count = 0; - either = 0; vaddr = t->alloc(); paddr = vtop((long long)vaddr); printf("%d: %-8s vaddr = %p paddr = %llx\n", i, t->testname, vaddr, paddr); - +#ifdef __x86_64__ + cmci_wait_count = 0; + either = 0; proc_interrupts(&b_mce, &b_cmci); gettimeofday(&t1, NULL); +#endif if (sigsetjmp(env, 1)) { if ((t->flags & F_SIGBUS) == 0) { printf("Unexpected SIGBUS\n"); } } else { t->inject(paddr, vaddr, t->notrigger); + sleep(3); t->trigger(vaddr); if (t->flags & F_SIGBUS) { printf("Expected SIGBUS, didn't get one\n"); @@ -636,13 +1069,13 @@ int main(int argc, char **argv) /* Give system a chance to process on possibly deep C-state idle cpus */ usleep(100); - +#ifdef __x86_64__ proc_interrupts(&a_mce, &a_cmci); - +#endif if (t->flags & F_FATAL) { printf("Big surprise ... still running. Thought that would be fatal\n"); } - +#ifdef __x86_64__ if (Sflag == 0 && (t->flags & (F_MCE | F_EITHER))) { if (a_mce == b_mce) { if (t->flags & F_EITHER) @@ -718,6 +1151,7 @@ skip2: while (t->flags & F_FATAL) t = next_test(t); } +#endif } printf("Test passed\n"); diff --git a/memattr/LICENSE b/memattr/LICENSE new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/memattr/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/memattr/Makefile b/memattr/Makefile new file mode 100644 index 0000000..a5ff968 --- /dev/null +++ b/memattr/Makefile @@ -0,0 +1,10 @@ +ifneq ($(KERNELRELEASE),) + obj-m:=pgprot_drv.o +else + KDIR := /lib/modules/$(shell uname -r)/build/ +PWD:=$(shell pwd) +all: + make -C $(KDIR) M=$(PWD) modules +clean: + rm -f *.ko *.o *.symvers *.cmd *.cmd.o +endif diff --git a/memattr/README b/memattr/README new file mode 100644 index 0000000..e4022d5 --- /dev/null +++ b/memattr/README @@ -0,0 +1,56 @@ +pgprot_drv + +Example of Linux kernel driver that allows a user-space program to mmap a +buffer of contiguous physical memory with specific memory attribute. + +Usage: + +1. Compile the driver and test case + + make + gcc -o test test.c + +2. Install the module with explicit memory attribute + + insmod pgprot_drv.ko pgprot=0 + +The remaped page provided by this driver is Normal Memory by default. If you +prefer other attribute, e.g. MT_DEVICE_nGnRnE, change pgprot=4 and reload this +module by: + + rmmod pgprot_drv.ko + insmod pgprot_drv.ko pgprot=4 + +Note: memory attribute index may vary with different kernel version. + +3. Then you will get the remaped physical address which memory attribute is +reset. Run user-level test case with -b to bench performance. When the memory +attribute is normal memory (pgprot=0): + + #./test -b + check ctx: vaddr = 0xffff8b12d000, num 56, paddr 8d3512000 + Buffer size: 4 KB, stride 128, time 0.000095 s, latency 1.45 ns + +While if set the memory attribute as MT_DEVICE_nGnRnE (pgprot=4): + + #./test -b + check ctx: vaddr = 0xffff8652f000, num 56, paddr 8a2af2000 + Buffer size: 4 KB, stride 128, time 0.008787 s, latency 134.15 ns + + +Run user-level test case with -p to poison. + + modprobe einj + ./test -p + +Check dmesg with PTE value: + + [20049.964983] vma->vm_start = ffff8b12d000 + [20049.964985] Memory Atrr: MT_NORMAL + [20049.972303] [0000ffff8b12d000] pgd=00000008959b1003 + [20049.972304] , p4d=00000008959b1003 + [20049.977169] , pud=00000008d06ae003 + [20049.980559] , pmd=000000089cfb9003 + [20049.983949] , pte=01680008d3512fc3 + [20049.990725] , pte present +
\ No newline at end of file diff --git a/memattr/pgprot_drv.c b/memattr/pgprot_drv.c new file mode 100644 index 0000000..fe464e3 --- /dev/null +++ b/memattr/pgprot_drv.c @@ -0,0 +1,188 @@ +/* + * Copyright (C) 2022 Alibaba Corporation + * Author: Shuai Xue + * + * This software may be redistributed and/or modified under the terms of + * the GNU General Public License ("GPL") version 2 only as published by the + * Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/miscdevice.h> +#include <linux/pagewalk.h> +#include <asm/memory.h> + +typedef struct +{ + int num; + /* + * Any unaligned access to memory region with any Device memory type + * attribute generates an Alignment fault. Thus, add a safe padding. + */ + char pad[3]; + long long int paddr; +} mpgprot_drv_ctx; + +#define DEV_NAME "pgprot_drv" +static mpgprot_drv_ctx *sh_mem = NULL; +#define SHARE_MEM_SIZE (PAGE_SIZE * 2) + +static int pgprot = 0; +module_param(pgprot, int, 0644); +MODULE_PARM_DESC(pgprot, "Get an value from user...\n"); + +void dump_pte(const struct mm_struct *const mm, + const unsigned long addr) +{ + pgd_t *pgdp, pgd; + p4d_t *p4dp, p4d; + pud_t *pudp, pud; + pmd_t *pmdp, pmd; + pte_t *ptep, pte; + + pgdp = pgd_offset(mm, addr); + pgd = READ_ONCE(*pgdp); + printk("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); + + if (pgd_none(pgd) || pgd_bad(pgd)) + return; + + p4dp = p4d_offset(pgdp, addr); + p4d = READ_ONCE(*p4dp); + printk(", p4d=%016llx", p4d_val(p4d)); + if (p4d_none(p4d) || p4d_bad(p4d)) + return; + + pudp = pud_offset(p4dp, addr); + pud = READ_ONCE(*pudp); + printk(", pud=%016llx", pud_val(pud)); + if (pud_none(pud) || pud_bad(pud)) + return; + + pmdp = pmd_offset(pudp, addr); + pmd = READ_ONCE(*pmdp); + printk(", pmd=%016llx", pmd_val(pmd)); + if (pmd_none(pmd) || pmd_bad(pmd)) + return; + + ptep = pte_offset_map(pmdp, addr); + pte = READ_ONCE(*ptep); + printk(", pte=%016llx\n", pte_val(pte)); + if (pte_present(*ptep)) + printk(", pte present\n"); + pte_unmap(ptep); +} + +static int pgprot_drv_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int ret; + uint64_t pfn; + struct page *page = NULL; + unsigned long size = (unsigned long)(vma->vm_end - vma->vm_start); + + if (size > SHARE_MEM_SIZE) + { + ret = -EINVAL; + goto err; + } + + sh_mem = (void *)__get_free_pages(GFP_KERNEL, 1); + if (!sh_mem) + { + printk("kmalloc error\n"); + goto err; + } + + pfn = virt_to_pfn(sh_mem); + printk("kmalloc pfn: %llx\n", pfn); + + sh_mem->num = 56; + sh_mem->paddr = pfn << PAGE_SHIFT; + + printk("vma->vm_start = %lx", vma->vm_start); + switch (pgprot) + { + case MT_NORMAL: + /* pgprot is MT_NORMAL by default */ + printk("Memory Atrr: MT_NORMAL\n"); + break; + case MT_NORMAL_TAGGED: + vma->vm_page_prot = pgprot_tagged(vma->vm_page_prot); + printk("Memory Atrr: MT_NORMAL_TAGGED\n"); + break; + case MT_NORMAL_NC: + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + printk("Memory Atrr: MT_NORMAL_NC\n"); + break; + case MT_DEVICE_nGnRnE: + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + printk("Memory Atrr: MT_DEVICE_nGnRnE\n"); + break; + case MT_DEVICE_nGnRE: + vma->vm_page_prot = pgprot_device(vma->vm_page_prot); + printk("Memory Atrr: MT_DEVICE_nGnRE\n"); + break; + default: + /* MT_NORMAL */ + printk("Memory Atrr: MT_NORMAL\n"); + break; + } + page = virt_to_page((unsigned long)sh_mem); + ret = remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size, vma->vm_page_prot); + if (ret) + { + goto err; + } + + dump_pte(current->mm, vma->vm_start); + + return 0; + +err: + return ret; +} + +static struct file_operations pgprot_drv_fops = + { + .owner = THIS_MODULE, + .mmap = pgprot_drv_mmap, +}; + +static struct miscdevice pgprot_drv_dev = + { + .minor = MISC_DYNAMIC_MINOR, + .name = DEV_NAME, + .fops = &pgprot_drv_fops, +}; + +static int pgprot_drv_init(void) +{ + int ret; + + ret = misc_register(&pgprot_drv_dev); + if (ret) + { + printk("register misc device error\n"); + return ret; + } + + printk("register misc ok\n"); + + return 0; +} + +static void pgprot_drv_exit(void) +{ + misc_deregister(&pgprot_drv_dev); + free_pages((unsigned long)sh_mem, 1); +} + +module_init(pgprot_drv_init); +module_exit(pgprot_drv_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/memattr/test.c b/memattr/test.c new file mode 100644 index 0000000..5a83c68 --- /dev/null +++ b/memattr/test.c @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2022 Alibaba Corporation + * Author: Shuai Xue + * + * This software may be redistributed and/or modified under the terms of + * the GNU General Public License ("GPL") version 2 only as published by the + * Free Software Foundation. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <time.h> +#include <sys/mman.h> +#include <sys/time.h> +#include <setjmp.h> +#include <signal.h> +#define _GNU_SOURCE 1 +#define __USE_GNU 1 +#include <sched.h> +#include <errno.h> +#include <sys/syscall.h> +#include <linux/futex.h> + +typedef struct +{ + int num; + /* + * Any unaligned access to memory region with any Device memory type + * attribute generates an Alignment fault. Thus, add a safe padding. + */ + char pad[3]; + long long int paddr; +} mpgprot_drv_ctx; + +#define DEV_NAME "/dev/pgprot_drv" +#define PAGE_SHIFT 12 +static mpgprot_drv_ctx *ctx = NULL; +static char *progname; + +#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type" +#define EINJ_ETYPE_AVAILABLE "/sys/kernel/debug/apei/einj/available_error_type" +#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1" +#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2" +#define EINJ_APIC "/sys/kernel/debug/apei/einj/param3" +#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags" +#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger" +#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject" +#define EINJ_VENDOR "/sys/kernel/debug/apei/einj/vendor" + +static void wfile(char *file, unsigned long long val) +{ + FILE *fp; + + fp = fopen(file, "w"); + if (fp == NULL) + { + fprintf(stderr, "%s: cannot open '%s'\n", progname, file); + exit(1); + } + fprintf(fp, "0x%llx\n", val); + if (fclose(fp) == EOF) + { + fprintf(stderr, "%s: write error on '%s'\n", progname, file); + exit(1); + } +} + +/* + * get information about address from /proc/self/pagemap + */ +unsigned long long vtop(unsigned long long addr) +{ + static int pagesize; + unsigned long long pinfo; + long offset; + int fd; + + if (pagesize == 0) + pagesize = getpagesize(); + offset = addr / pagesize * (sizeof pinfo); + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd == -1) + { + perror("pagemap"); + exit(1); + } + if (pread(fd, &pinfo, sizeof pinfo, offset) != sizeof pinfo) + { + perror("pagemap"); + exit(1); + } + close(fd); + if ((pinfo & (1ull << 63)) == 0) + { + printf("page not present\n"); + return ~0ull; + } + return ((pinfo & 0x007fffffffffffffull) * pagesize) + (addr & (pagesize - 1)); +} + +static void inject_uc(unsigned long long addr, void *vaddr, int notrigger) +{ + wfile(EINJ_ETYPE, 0x20); + wfile(EINJ_ADDR, addr); + wfile(EINJ_MASK, ~0x0ul); + wfile(EINJ_FLAGS, 2); + wfile(EINJ_NOTRIGGER, notrigger); + wfile(EINJ_DOIT, 1); +} + +int trigger_write(char *addr) +{ + addr[0] = 0x69; + return 0; +} + +#define ONE p = (char **)*p; +#define FIVE ONE ONE ONE ONE ONE +#define TEN FIVE FIVE +#define FIFTY TEN TEN TEN TEN TEN +#define HUNDRED FIFTY FIFTY + +static int poison = 0; +static int bench = 0; +int main(int argc, char *argv[]) +{ + int kfd, c; + long long paddr; + void *vaddr; + + progname = argv[0]; + while ((c = getopt(argc, argv, "pb")) != -1) + switch (c) + { + case 'p': + poison = 1; + break; + case 'b': + bench = 1; + break; + } + + kfd = open(DEV_NAME, O_RDWR | O_NDELAY); + if (kfd < 0) + { + printf("open file %s error\n", DEV_NAME); + return -1; + } + + vaddr = mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, kfd, 0); + if (vaddr == MAP_FAILED) + { + printf("allocate mem fail %d!!!\n", 4096); + exit(1); + } + + ctx = (mpgprot_drv_ctx *)vaddr; + printf("check ctx: vaddr = %p, num %d, paddr %llx\n", vaddr, ctx->num, ctx->paddr); + + if (bench) + { + struct timeval tv1, tv2; + int memsize = 4096; + int stride = 128; + int size = memsize / stride; + unsigned *indices = malloc(size * sizeof(int)); + int i, count, tmp; + struct timezone tz; + char *mem = vaddr; + unsigned long sec, usec; + + for (i = 0; i < size; i++) + indices[i] = i; + + // trick 2: fill mem with pointer references + for (i = 0; i < size - 1; i++) + *(char **)&mem[indices[i] * stride] = (char *)&mem[indices[i + 1] * stride]; + *(char **)&mem[indices[size - 1] * stride] = (char *)&mem[indices[0] * stride]; + + register char **p = (char **)mem; + tmp = count / 100; + + gettimeofday(&tv1, &tz); + for (i = 0; i < tmp; ++i) + { + HUNDRED; + } + gettimeofday(&tv2, &tz); + + if (tv2.tv_usec < tv1.tv_usec) + { + usec = 1000000 + tv2.tv_usec - tv1.tv_usec; + sec = tv2.tv_sec - tv1.tv_sec - 1; + } + else + { + usec = tv2.tv_usec - tv1.tv_usec; + sec = tv2.tv_sec - tv1.tv_sec; + } + + /* touch pointer p to prevent compiler optimization */ + char **touch = p; + printf("Buffer size: %ld KB, stride %d, time %d.%06d s, latency %.2f ns\n", + memsize / 1024, stride, sec, usec, (sec * 1000000 + usec) * 1000.0 / (tmp * 100)); + } + + if (poison) + { + /* pick from kernel */ + long long int paddr = ctx->paddr; + printf("vaddr = %p paddr = %llx\n", vaddr, paddr); + inject_uc(paddr, vaddr, 1); + sleep(3); + trigger_write(vaddr); + } + + munmap(ctx, 4096); + + return 0; +} diff --git a/ras-tolerance/Makefile b/ras-tolerance/Makefile new file mode 100644 index 0000000..4f1e7cd --- /dev/null +++ b/ras-tolerance/Makefile @@ -0,0 +1,11 @@ +ifneq ($(KERNELRELEASE),) + obj-m:=ras_tolerance.o +else + KDIR := /lib/modules/$(shell uname -r)/build/ +CFLAGS= -O0 +PWD:=$(shell pwd) +all: + make -C $(KDIR) M=$(PWD) modules +clean: + rm -f *.ko *.o *.symvers *.cmd *.cmd.o +endif diff --git a/ras-tolerance/README b/ras-tolerance/README new file mode 100644 index 0000000..a799c0c --- /dev/null +++ b/ras-tolerance/README @@ -0,0 +1,9 @@ +ras-tolerance + +When a hardware error occurs for a non corrected ras event the kernel can take +different actions. If the severity is fatal, the kernel panic immediately. + +This driver allows to overwrite error severity to a lower level at runtime, +recoverable by default. It is useful for test. + +NOTE: ONLY FOR TEST. Be sure what your are doing.
\ No newline at end of file diff --git a/ras-tolerance/ras_tolerance.c b/ras-tolerance/ras_tolerance.c new file mode 100644 index 0000000..4e6094b --- /dev/null +++ b/ras-tolerance/ras_tolerance.c @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2022 Alibaba Corporation + * Author: Shuai Xue + * + * This software may be redistributed and/or modified under the terms of + * the GNU General Public License ("GPL") version 2 only as published by the + * Free Software Foundation. + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ +#define GHES_PFX "GHES: " + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kprobes.h> +#include <linux/io.h> +#include <linux/cper.h> + +#include <acpi/ghes.h> +#include <asm/fixmap.h> + +#include <linux/moduleparam.h> +#include <linux/init.h> + +#include <linux/io.h> +#include <linux/interrupt.h> +#include <linux/timer.h> +#include <linux/cper.h> +#include <linux/platform_device.h> +#include <linux/mutex.h> +#include <linux/ratelimit.h> +#include <linux/vmalloc.h> +#include <linux/irq_work.h> +#include <linux/llist.h> +#include <linux/genalloc.h> +#include <linux/pci.h> +#include <linux/pfn.h> +#include <linux/aer.h> +#include <linux/nmi.h> +#include <linux/sched/clock.h> +#include <linux/uuid.h> +#include <linux/ras.h> +#include <linux/task_work.h> + +#include <acpi/actbl1.h> +#include <acpi/ghes.h> +#include <acpi/apei.h> +#include <asm/fixmap.h> +#include <asm/tlbflush.h> +#include <ras/ras_event.h> +#include <linux/acpi.h> + +#define LOOKUP_SYMS_EX(name, sym) \ + do \ + { \ + orig_##name = (void *)kallsyms_lookup_name(sym); \ + if (!orig_##name) \ + { \ + pr_err("kallsyms_lookup_name: %s\n", #name); \ + return -EINVAL; \ + } \ + } while (0) + +#define LOOKUP_SYMS(name) LOOKUP_SYMS_EX(name, #name) + +#define MAX_SYMBOL_LEN 64 +static char symbol[MAX_SYMBOL_LEN] = "ghes_in_nmi_queue_one_entry"; +module_param_string(symbol, symbol, sizeof(symbol), 0644); + +extern int apei_read(u64 *val, struct acpi_generic_address *reg); +extern void cper_estatus_print(const char *pfx, + const struct acpi_hest_generic_status *estatus); +extern int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus); +extern int cper_estatus_check(const struct acpi_hest_generic_status *estatus); + +static void (*orig_ghes_copy_tofrom_phys)(void *buffer, u64 paddr, u32 len, + int from_phys, + enum fixed_addresses fixmap_idx); + +/* Read the CPER block, returning its address, and header in estatus. */ +static int __ghes_peek_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) +{ + struct acpi_hest_generic *g = ghes->generic; + int rc; + + rc = apei_read(buf_paddr, &g->error_status_address); + if (rc) + { + *buf_paddr = 0; + pr_warn_ratelimited(FW_WARN GHES_PFX + "Failed to read error status block address for hardware error source: %d.\n", + g->header.source_id); + return -EIO; + } + if (!*buf_paddr) + return -ENOENT; + + orig_ghes_copy_tofrom_phys(estatus, *buf_paddr, sizeof(*estatus), 1, + fixmap_idx); + if (!estatus->block_status) + { + *buf_paddr = 0; + return -ENOENT; + } + + return 0; +} + +static int __ghes_read_estatus(struct acpi_hest_generic_status *estatus, + u64 buf_paddr, enum fixed_addresses fixmap_idx, + size_t buf_len) +{ + orig_ghes_copy_tofrom_phys(estatus, buf_paddr, buf_len, 1, fixmap_idx); + if (cper_estatus_check(estatus)) + { + pr_warn_ratelimited(FW_WARN GHES_PFX + "Failed to read error status block!\n"); + return -EIO; + } + + return 0; +} + +static inline u32 cper_estatus_len(struct acpi_hest_generic_status *estatus) +{ + if (estatus->raw_data_length) + return estatus->raw_data_offset + + estatus->raw_data_length; + else + return sizeof(*estatus) + estatus->data_length; +} + +/* Check the top-level record header has an appropriate size. */ +static int __ghes_check_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus) +{ + u32 len = cper_estatus_len(estatus); + + if (len < sizeof(*estatus)) + { + pr_warn_ratelimited(FW_WARN GHES_PFX "Truncated error status block!\n"); + return -EIO; + } + + if (len > ghes->generic->error_block_length) + { + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid error status block length!\n"); + return -EIO; + } + + if (cper_estatus_check_header(estatus)) + { + pr_warn_ratelimited(FW_WARN GHES_PFX "Invalid CPER header!\n"); + return -EIO; + } + + return 0; +} + +static int ghes_read_estatus(struct ghes *ghes, + struct acpi_hest_generic_status *estatus, + u64 *buf_paddr, enum fixed_addresses fixmap_idx) +{ + int rc; + + rc = __ghes_peek_estatus(ghes, estatus, buf_paddr, fixmap_idx); + if (rc) + return rc; + + rc = __ghes_check_estatus(ghes, estatus); + if (rc) + return rc; + + return __ghes_read_estatus(estatus, *buf_paddr, fixmap_idx, + cper_estatus_len(estatus)); +} + +/* For each probe you need to allocate a kprobe structure */ +static struct kprobe kp = { + .symbol_name = symbol, +}; + +/* kprobe pre_handler: called just before the probed instruction is executed */ +static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs) +{ + struct acpi_hest_generic_status *estatus; + struct ghes *ghes; + u64 buf_paddr; + int rc; + __u16 new_severity = CPER_SEV_RECOVERABLE; + __u16 old_severity; + + ghes = (struct ghes *)regs_get_register(regs, 0); + estatus = ghes->estatus; + + rc = ghes_read_estatus(ghes, estatus, &buf_paddr, FIX_APEI_GHES_IRQ); + + old_severity = estatus->error_severity; + estatus->error_severity = CPER_SEV_RECOVERABLE; + pr_info("<%s> Overwrite %s => %s\n", p->symbol_name, + cper_severity_str(old_severity), + cper_severity_str(new_severity)); + + orig_ghes_copy_tofrom_phys(estatus, buf_paddr, sizeof(*estatus), 0, + FIX_APEI_GHES_IRQ); + + /* A dump_stack() here will give a stack backtrace */ + return 0; +} + +/* kprobe post_handler: called after the probed instruction is executed */ +static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + pr_info("<%s> p->addr = 0x%p, pstate = 0x%lx\n", + p->symbol_name, p->addr, (long)regs->pstate); +} + +static int __init kprobe_init(void) +{ + int ret; + kp.pre_handler = handler_pre; + kp.post_handler = handler_post; + + ret = register_kprobe(&kp); + if (ret < 0) + { + pr_err("register_kprobe failed, returned %d\n", ret); + return ret; + } + pr_info("Planted kprobe at %s (%p)\n", kp.symbol_name, kp.addr); + LOOKUP_SYMS(ghes_copy_tofrom_phys); + return 0; +} + +static void __exit kprobe_exit(void) +{ + unregister_kprobe(&kp); + pr_info("kprobe at %s (%p) unregistered\n", kp.symbol_name, kp.addr); +} + +module_init(kprobe_init) + module_exit(kprobe_exit) + MODULE_LICENSE("GPL"); |