aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShuai Xue <xueshuai@linux.alibaba.com>2023-02-13 10:02:21 +0800
committerTony Luck <tony.luck@intel.com>2023-03-06 13:03:40 -0800
commit56c34a74bb5946e153fd8529dba87e2adca57f5d (patch)
tree3cf9737bc8b1c9a64eaa87f194a159903037b11e
parent507960843a49fb955bc4b4e6548618eec592ec09 (diff)
downloadras-tools-56c34a74bb5946e153fd8529dba87e2adca57f5d.tar.gz
einj_mem_uc: add extra arguments to support guest error injection
To support Guest Error injection, add two extra arguments: - '-j': skip error injection, this step should do with host physical address on host which creates GPA->HPA mappings for the guest. - '-k': kick off trigger by writing a file from remote (host). The steps to inject guest error are: STEP 1: start a VM with a stdio monitor which allows giving complex commands to the QEMU emulator. qemu-system-aarch64 -enable-kvm \ -cpu host \ -M virt,gic-version=3 \ -m 8G \ -d guest_errors \ -rtc base=localtime,clock=host \ -smp cores=2,threads=2,sockets=2 \ -object memory-backend-ram,id=mem0,size=4G \ -object memory-backend-ram,id=mem1,size=4G \ -numa node,memdev=mem0,cpus=0-3,nodeid=0 \ -numa node,memdev=mem1,cpus=4-7,nodeid=1 \ -bios /usr/share/AAVMF/AAVMF_CODE.fd \ -drive driver=qcow2,media=disk,cache=writeback,if=virtio,id=alinu1_rootfs,file=/path/to/image.qcow2 \ -netdev user,id=n1,hostfwd=tcp::5555-:22 \ -serial telnet:localhost:4321,server,nowait \ -device virtio-net-pci,netdev=n1 \ -monitor stdio QEMU 7.2.0 monitor - type 'help' for more information (qemu) VNC server running on 127.0.0.1:5900 STEP 2: login guest and install ras-tools, then run `einj_mem_uc` to allocate a page in userspace, dumps the virtual and physical address of the page. The `-j` is to skip error injection and `-k` is to wait for a kick. $ ./einj_mem_uc single -j -k 0: single vaddr = 0xffffbd88c400 paddr = 151f21400 STEP 3: run command `gpa2hpa` in QEMU monitor and it will print the host physical address at which the guest's physical address addr is mapped. (qemu) gpa2hpa 0x151f21400 Host physical address for 0x151f21400 (mem1) is 0x935757400 STEP 4: inject an uncorrected error via the APEI interface to the finally translated host physical address on host. echo 0x949a84400 > /sys/kernel/debug/apei/einj/param1 echo 0xfffffffffffff000 > /sys/kernel/debug/apei/einj/param2 echo 0x0 > /sys/kernel/debug/apei/einj/flags echo 0x10 > /sys/kernel/debug/apei/einj/error_type echo 1 > /sys/kernel/debug/apei/einj/notrigger echo 1 > /sys/kernel/debug/apei/einj/error_inject STEP 5: then kick `einj_mem_uc` to trigger the error by writing "trigger_start". In this example, the kick is done on host. ssh -p 5555 root@localhost "echo trigger > ~/trigger_start" STEP 6: We will observe that the QEMU process exit. (qemu) qemu-system-aarch64: Hardware memory error! Signed-off-by: zhangyangzeyu.zyzy <xiaoque@linux.alibaba.com> Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--README71
-rw-r--r--einj_mem_uc.c52
2 files changed, 117 insertions, 6 deletions
diff --git a/README b/README
index da17edd..a896ab9 100644
--- a/README
+++ b/README
@@ -19,3 +19,74 @@ Arm platform specific drivers:
- memattr: a test suit to poison specific memory attribute.
- ras-tolerance: a driver to overwrite error severity to a lower level at runtime.
+
+Virtualization:
+
+Injecting errors into guests is a rather manual process. You can run einj_mem_uc
+inside the guest with special arguments to skip the injection, but still print
+the guest physical address. Then on the host convert that to a host physical
+address and inject. Finally have the process on the guest consume the error.
+
+Detailed steps are:
+
+- '-j': skip error injection, this step should do with host physical
+ address on host which creates GPA->HPA mappings for the guest.
+- '-k': kick off trigger by writing a file from remote (host).
+
+The steps to inject guest error are:
+
+STEP 1: start a VM with a stdio monitor which allows giving complex
+commands to the QEMU emulator.
+
+ qemu-system-aarch64 -enable-kvm \
+ -cpu host \
+ -M virt,gic-version=3 \
+ -m 8G \
+ -d guest_errors \
+ -rtc base=localtime,clock=host \
+ -smp cores=2,threads=2,sockets=2 \
+ -object memory-backend-ram,id=mem0,size=4G \
+ -object memory-backend-ram,id=mem1,size=4G \
+ -numa node,memdev=mem0,cpus=0-3,nodeid=0 \
+ -numa node,memdev=mem1,cpus=4-7,nodeid=1 \
+ -bios /usr/share/AAVMF/AAVMF_CODE.fd \
+ -drive driver=qcow2,media=disk,cache=writeback,if=virtio,id=alinu1_rootfs,file=/path/to/image.qcow2 \
+ -netdev user,id=n1,hostfwd=tcp::5555-:22 \
+ -serial telnet:localhost:4321,server,nowait \
+ -device virtio-net-pci,netdev=n1 \
+ -monitor stdio
+ QEMU 7.2.0 monitor - type 'help' for more information
+ (qemu) VNC server running on 127.0.0.1:5900
+
+STEP 2: login guest and install ras-tools, then run `einj_mem_uc` to
+allocate a page in userspace, dumps the virtual and physical address of the
+page. The `-j` is to skip error injection and `-k` is to wait for a kick.
+
+ $ ./einj_mem_uc single -j -k
+ 0: single vaddr = 0xffffbd88c400 paddr = 151f21400
+
+STEP 3: run command `gpa2hpa` in QEMU monitor and it will print the host
+physical address at which the guest's physical address addr is mapped.
+
+ (qemu) gpa2hpa 0x151f21400
+ Host physical address for 0x151f21400 (mem1) is 0x935757400
+
+STEP 4: inject an uncorrected error via the APEI interface to the finally
+translated host physical address on host.
+
+ echo 0x949a84400 > /sys/kernel/debug/apei/einj/param1
+ echo 0xfffffffffffff000 > /sys/kernel/debug/apei/einj/param2
+ echo 0x0 > /sys/kernel/debug/apei/einj/flags
+ echo 0x10 > /sys/kernel/debug/apei/einj/error_type
+ echo 1 > /sys/kernel/debug/apei/einj/notrigger
+ echo 1 > /sys/kernel/debug/apei/einj/error_inject
+
+STEP 5: then kick `einj_mem_uc` to trigger the error by writing
+"trigger_start". In this example, the kick is done on host.
+
+ ssh -p 5555 root@localhost "echo trigger > ~/trigger_start"
+
+STEP 6: We will observe that the QEMU process exit.
+
+ (qemu) qemu-system-aarch64: Hardware memory error!
+
diff --git a/einj_mem_uc.c b/einj_mem_uc.c
index acc64fd..88764d0 100644
--- a/einj_mem_uc.c
+++ b/einj_mem_uc.c
@@ -1057,7 +1057,7 @@ static void show_help(void)
{
struct test *t;
- printf("Usage: %s [-a][-c count][-d delay][-f][-i] [-m runup:size:align][testname]\n", progname);
+ printf("Usage: %s [-a][-c count][-d delay][-f][-i][j][k] [-m runup:size:align][testname]\n", progname);
printf(" %-8s %-5s %s\n", "Testname", "Fatal", "Description");
for (t = tests; t->testname; t++)
printf(" %-8s %-5s %s\n", t->testname,
@@ -1098,10 +1098,40 @@ struct sigaction recover_act = {
.sa_flags = SA_SIGINFO,
};
+void kick_by_file(struct test *t, char *addr) {
+ const char *trigger = "./trigger_start";
+ const char *trigger_flag = "trigger";
+ char trigger_buf[16];
+ int count = 64*3;
+ int fd;
+ errno = 0;
+
+ if (unlink(trigger) < 0 && errno != ENOENT) {
+ fprintf(stderr, "fail to remove trigger file\n");
+ exit(1);
+ }
+
+ memset(trigger_buf, 0, sizeof(trigger_buf));
+ while (count--) {
+ if ((fd = open(trigger, O_RDONLY)) < 0) {
+ sleep(1);
+ continue;
+ }
+ if (read(fd, trigger_buf, sizeof(trigger_buf)) > 0 &&
+ strstr(trigger_buf, trigger_flag) != NULL) {
+ break;
+ }
+ sleep(1);
+ }
+
+ /* trigger now */
+ t->trigger(addr);
+}
+
int main(int argc, char **argv)
{
int c, i;
- int count = 1;
+ int count = 1, kick = 0, inject_skip_flag = 0;
double delay = 1.0;
struct test *t;
void *vaddr;
@@ -1118,7 +1148,7 @@ int main(int argc, char **argv)
pagesize = getpagesize();
pid = getpid();
- while ((c = getopt(argc, argv, "ac:d:fhim:z:S")) != -1) switch (c) {
+ while ((c = getopt(argc, argv, "ac:d:fhijkm:z:S")) != -1) switch (c) {
case 'a':
all_flag = 1;
break;
@@ -1134,6 +1164,12 @@ int main(int argc, char **argv)
case 'i':
cmci_skip_flag = 1;
break;
+ case 'j':
+ inject_skip_flag = 1;
+ break;
+ case 'k':
+ kick = 1;
+ break;
case 'm':
parse_memcpy(optarg);
break;
@@ -1148,7 +1184,7 @@ int main(int argc, char **argv)
break;
}
- if (Sflag == 0)
+ if (Sflag == 0 && inject_skip_flag == 0)
check_configuration();
if (optind < argc)
@@ -1178,9 +1214,13 @@ int main(int argc, char **argv)
printf("Unexpected SIGBUS\n");
}
} else {
- t->inject(paddr, vaddr, t->notrigger);
+ if (!inject_skip_flag)
+ t->inject(paddr, vaddr, t->notrigger);
sleep(3);
- t->trigger(vaddr);
+ if (kick)
+ kick_by_file(t, vaddr);
+ else
+ t->trigger(vaddr);
if (t->flags & F_SIGBUS) {
printf("Expected SIGBUS, didn't get one\n");
}