aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc Zyngier <marc.zyngier@arm.com>2012-07-04 16:49:31 +0100
committerMarc Zyngier <marc.zyngier@arm.com>2012-07-04 16:49:31 +0100
commitfd33ed44aa9a5ad92cf0fa25fc0921a34f298402 (patch)
treec27c4a37c30437941cd4b1eb2eee2028dd37cf7b
parent6887a4131da3adaab011613776d865f4bcfb5678 (diff)
parentd5321dceeaccf756755e76b38d8b5905bd99d250 (diff)
downloadlinux-kvm-arm-kvm-a15-v10-stage-v3.5-rc5.tar.gz
Merge branch 'kvm-a15-v10-stage' into kvm-a15-v10-stage-v3.5-rckvm-a15-v10-stage-v3.5-rc5
-rw-r--r--Documentation/virtual/kvm/api.txt48
-rw-r--r--Documentation/virtual/kvm/msr.txt33
-rw-r--r--Documentation/virtual/kvm/ppc-pv.txt2
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/Makefile1
-rw-r--r--arch/arm/include/asm/idmap.h7
-rw-r--r--arch/arm/include/asm/kvm.h89
-rw-r--r--arch/arm/include/asm/kvm_arm.h184
-rw-r--r--arch/arm/include/asm/kvm_asm.h58
-rw-r--r--arch/arm/include/asm/kvm_emulate.h120
-rw-r--r--arch/arm/include/asm/kvm_host.h159
-rw-r--r--arch/arm/include/asm/kvm_mmu.h41
-rw-r--r--arch/arm/include/asm/mach/map.h1
-rw-r--r--arch/arm/include/asm/pgtable-3level-hwdef.h5
-rw-r--r--arch/arm/include/asm/pgtable-3level.h13
-rw-r--r--arch/arm/include/asm/pgtable.h5
-rw-r--r--arch/arm/include/asm/unified.h12
-rw-r--r--arch/arm/kernel/armksyms.c7
-rw-r--r--arch/arm/kernel/asm-offsets.c43
-rw-r--r--arch/arm/kernel/entry-armv.S1
-rw-r--r--arch/arm/kernel/vmlinux.lds.S6
-rw-r--r--arch/arm/kvm/Kconfig45
-rw-r--r--arch/arm/kvm/Makefile17
-rw-r--r--arch/arm/kvm/arm.c881
-rw-r--r--arch/arm/kvm/emulate.c873
-rw-r--r--arch/arm/kvm/exports.c38
-rw-r--r--arch/arm/kvm/guest.c165
-rw-r--r--arch/arm/kvm/init.S149
-rw-r--r--arch/arm/kvm/interrupts.S698
-rw-r--r--arch/arm/kvm/mmu.c644
-rw-r--r--arch/arm/kvm/reset.c133
-rw-r--r--arch/arm/kvm/trace.h117
-rw-r--r--arch/arm/mm/Kconfig10
-rw-r--r--arch/arm/mm/idmap.c88
-rw-r--r--arch/arm/mm/mmu.c9
-rw-r--r--arch/ia64/include/asm/kvm.h1
-rw-r--r--arch/ia64/kvm/Kconfig1
-rw-r--r--arch/ia64/kvm/kvm-ia64.c33
-rw-r--r--arch/powerpc/include/asm/epapr_hcalls.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h7
-rw-r--r--arch/powerpc/include/asm/kvm_host.h6
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h3
-rw-r--r--arch/powerpc/kernel/Makefile1
-rw-r--r--arch/powerpc/kernel/epapr_hcalls.S25
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c52
-rw-r--r--arch/powerpc/kernel/kvm.c28
-rw-r--r--arch/powerpc/kernel/kvm_emul.S12
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c123
-rw-r--r--arch/powerpc/kvm/book3s_hv.c40
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c15
-rw-r--r--arch/powerpc/kvm/booke.c5
-rw-r--r--arch/powerpc/kvm/booke_emulate.c3
-rw-r--r--arch/powerpc/kvm/e500_emulate.c3
-rw-r--r--arch/powerpc/kvm/powerpc.c18
-rw-r--r--arch/powerpc/platforms/Kconfig9
-rw-r--r--arch/s390/include/asm/sclp.h2
-rw-r--r--arch/s390/kernel/setup.c12
-rw-r--r--arch/s390/kvm/kvm-s390.c1
-rw-r--r--arch/x86/include/asm/bitops.h7
-rw-r--r--arch/x86/include/asm/kvm.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h12
-rw-r--r--arch/x86/include/asm/kvm_para.h7
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/kernel/kvm.c57
-rw-r--r--arch/x86/kvm/cpuid.c1
-rw-r--r--arch/x86/kvm/lapic.c194
-rw-r--r--arch/x86/kvm/lapic.h11
-rw-r--r--arch/x86/kvm/mmu.c59
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/trace.h34
-rw-r--r--arch/x86/kvm/vmx.c92
-rw-r--r--arch/x86/kvm/x86.c113
-rw-r--r--drivers/s390/char/sclp.c10
-rw-r--r--drivers/s390/char/sclp.h10
-rw-r--r--drivers/s390/char/sclp_cmd.c38
-rw-r--r--drivers/s390/kvm/kvm_virtio.c3
-rw-r--r--include/linux/kvm.h4
-rw-r--r--include/linux/kvm_host.h28
-rw-r--r--include/trace/events/kvm.h7
-rw-r--r--mm/memory.c2
-rw-r--r--virt/kvm/kvm_main.c53
83 files changed, 5585 insertions, 281 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 930126698a0f5..79c10fced19d8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -614,15 +614,19 @@ only go to the IOAPIC. On ia64, a IOSAPIC is created.
4.25 KVM_IRQ_LINE
Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86, ia64, arm
Type: vm ioctl
Parameters: struct kvm_irq_level
Returns: 0 on success, -1 on error
Sets the level of a GSI input to the interrupt controller model in the kernel.
-Requires that an interrupt controller model has been previously created with
-KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level
-to be set to 1 and then back to 0.
+On some architectures it is required that an interrupt controller model has
+been previously created with KVM_CREATE_IRQCHIP. Note that edge-triggered
+interrupts require the level to be set to 1 and then back to 0.
+
+ARM uses two types of interrupt lines per CPU: IRQ and FIQ. The value of the
+irq field should be (vcpu_index << 1) for IRQs and ((vcpu_index << 1) | 1) for
+FIQs. Level is used to raise/lower the line.
struct kvm_irq_level {
union {
@@ -1930,6 +1934,42 @@ The "pte_enc" field provides a value that can OR'ed into the hash
PTE's RPN field (ie, it needs to be shifted left by 12 to OR it
into the hash PTE second double word).
+
+4.75 KVM_PPC_ALLOCATE_HTAB
+
+Capability: KVM_CAP_PPC_ALLOC_HTAB
+Architectures: powerpc
+Type: vm ioctl
+Parameters: Pointer to u32 containing hash table order (in/out)
+Returns: 0 on success, -1 on error
+
+This requests the host kernel to allocate an MMU hash table for a
+guest using the PAPR paravirtualization interface. This only does
+anything if the kernel is configured to use the Book 3S HV style of
+virtualization. Otherwise the capability doesn't exist and the ioctl
+returns an ENOTTY error. The rest of this description assumes Book 3S
+HV.
+
+There must be no vcpus running when this ioctl is called; if there
+are, it will do nothing and return an EBUSY error.
+
+The parameter is a pointer to a 32-bit unsigned integer variable
+containing the order (log base 2) of the desired size of the hash
+table, which must be between 18 and 46. On successful return from the
+ioctl, it will have been updated with the order of the hash table that
+was allocated.
+
+If no hash table has been allocated when any vcpu is asked to run
+(with the KVM_RUN ioctl), the host kernel will allocate a
+default-sized hash table (16 MB).
+
+If this ioctl is called when a hash table has already been allocated,
+the kernel will clear out the existing hash table (zero all HPTEs) and
+return the hash table order in the parameter. (If the guest is using
+the virtualized real-mode area (VRMA) facility, the kernel will
+re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.)
+
+
5. The kvm_run structure
------------------------
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 96b41bd975233..730471048583b 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -223,3 +223,36 @@ MSR_KVM_STEAL_TIME: 0x4b564d03
steal: the amount of time in which this vCPU did not run, in
nanoseconds. Time during which the vcpu is idle, will not be
reported as steal time.
+
+MSR_KVM_EOI_EN: 0x4b564d04
+ data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
+ when disabled. Bit 1 is reserved and must be zero. When PV end of
+ interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
+ physical address of a 4 byte memory area which must be in guest RAM and
+ must be zeroed.
+
+ The first, least significant bit of 4 byte memory location will be
+ written to by the hypervisor, typically at the time of interrupt
+ injection. Value of 1 means that guest can skip writing EOI to the apic
+ (using MSR or MMIO write); instead, it is sufficient to signal
+ EOI by clearing the bit in guest memory - this location will
+ later be polled by the hypervisor.
+ Value of 0 means that the EOI write is required.
+
+ It is always safe for the guest to ignore the optimization and perform
+ the APIC EOI write anyway.
+
+ Hypervisor is guaranteed to only modify this least
+ significant bit while in the current VCPU context, this means that
+ guest does not need to use either lock prefix or memory ordering
+ primitives to synchronise with the hypervisor.
+
+ However, hypervisor can set and clear this memory bit at any time:
+ therefore to make sure hypervisor does not interrupt the
+ guest and clear the least significant bit in the memory area
+ in the window between guest testing it to detect
+ whether it can skip EOI apic write and between guest
+ clearing it to signal EOI to the hypervisor,
+ guest must both read the least significant bit in the memory area and
+ clear it using a single CPU instruction, such as test and clear, or
+ compare and exchange.
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt
index 6e7c370509304..4911cf95c67e5 100644
--- a/Documentation/virtual/kvm/ppc-pv.txt
+++ b/Documentation/virtual/kvm/ppc-pv.txt
@@ -109,8 +109,6 @@ The following bits are safe to be set inside the guest:
MSR_EE
MSR_RI
- MSR_CR
- MSR_ME
If any other bit changes in the MSR, please still use mtmsr(d).
diff --git a/MAINTAINERS b/MAINTAINERS
index eb22272b2116c..d5e291b00ae8a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3990,8 +3990,8 @@ F: arch/ia64/include/asm/kvm*
F: arch/ia64/kvm/
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
-M: Carsten Otte <cotte@de.ibm.com>
M: Christian Borntraeger <borntraeger@de.ibm.com>
+M: Cornelia Huck <cornelia.huck@de.ibm.com>
M: linux390@de.ibm.com
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a91009c618706..3fb62444882bd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2274,3 +2274,5 @@ source "security/Kconfig"
source "crypto/Kconfig"
source "lib/Kconfig"
+
+source "arch/arm/kvm/Kconfig"
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 0298b00fe2413..64f1e167de26d 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -250,6 +250,7 @@ core-$(CONFIG_VFP) += arch/arm/vfp/
# If we have a machine-specific directory, then include it in the build.
core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/
core-y += arch/arm/net/
+core-y += arch/arm/kvm/
core-y += $(machdirs) $(platdirs)
drivers-$(CONFIG_OPROFILE) += arch/arm/oprofile/
diff --git a/arch/arm/include/asm/idmap.h b/arch/arm/include/asm/idmap.h
index bf863edb517dd..a1ab8d68874cb 100644
--- a/arch/arm/include/asm/idmap.h
+++ b/arch/arm/include/asm/idmap.h
@@ -11,4 +11,11 @@ extern pgd_t *idmap_pgd;
void setup_mm_for_reboot(void);
+#ifdef CONFIG_ARM_VIRT_EXT
+extern pgd_t *hyp_pgd;
+
+void hyp_idmap_teardown(void);
+void hyp_idmap_setup(void);
+#endif
+
#endif /* __ASM_IDMAP_H */
diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
new file mode 100644
index 0000000000000..54f301d3c445b
--- /dev/null
+++ b/arch/arm/include/asm/kvm.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#include <asm/types.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+
+/*
+ * KVM_IRQ_LINE macros to set/read IRQ/FIQ for specific VCPU index.
+ */
+enum KVM_ARM_IRQ_LINE_TYPE {
+ KVM_ARM_IRQ_LINE = 0,
+ KVM_ARM_FIQ_LINE = 1,
+};
+
+/*
+ * Modes used for short-hand mode determinition in the world-switch code and
+ * in emulation code.
+ *
+ * Note: These indices do NOT correspond to the value of the CPSR mode bits!
+ */
+enum vcpu_mode {
+ MODE_FIQ = 0,
+ MODE_IRQ,
+ MODE_SVC,
+ MODE_ABT,
+ MODE_UND,
+ MODE_USR,
+ MODE_SYS
+};
+
+struct kvm_regs {
+ __u32 regs0_7[8]; /* Unbanked regs. (r0 - r7) */
+ __u32 fiq_regs8_12[5]; /* Banked fiq regs. (r8 - r12) */
+ __u32 usr_regs8_12[5]; /* Banked usr registers (r8 - r12) */
+ __u32 reg13[6]; /* Banked r13, indexed by MODE_ */
+ __u32 reg14[6]; /* Banked r13, indexed by MODE_ */
+ __u32 reg15;
+ __u32 cpsr;
+ __u32 spsr[5]; /* Banked SPSR, indexed by MODE_ */
+ struct {
+ __u32 c0_midr;
+ __u32 c1_sys;
+ __u32 c2_base0;
+ __u32 c2_base1;
+ __u32 c2_control;
+ __u32 c3_dacr;
+ } cp15;
+
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+};
+
+struct kvm_arch_memory_slot {
+};
+
+#endif /* __ARM_KVM_H__ */
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
new file mode 100644
index 0000000000000..1efa452ae6142
--- /dev/null
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_ARM_H__
+#define __ARM_KVM_ARM_H__
+
+#include <asm/types.h>
+
+/* Supported Processor Types */
+#define CORTEX_A15 (0xC0F)
+
+/* Multiprocessor Affinity Register */
+#define MPIDR_CPUID (0x3 << 0)
+
+/* Hyp Configuration Register (HCR) bits */
+#define HCR_TGE (1 << 27)
+#define HCR_TVM (1 << 26)
+#define HCR_TTLB (1 << 25)
+#define HCR_TPU (1 << 24)
+#define HCR_TPC (1 << 23)
+#define HCR_TSW (1 << 22)
+#define HCR_TAC (1 << 21)
+#define HCR_TIDCP (1 << 20)
+#define HCR_TSC (1 << 19)
+#define HCR_TID3 (1 << 18)
+#define HCR_TID2 (1 << 17)
+#define HCR_TID1 (1 << 16)
+#define HCR_TID0 (1 << 15)
+#define HCR_TWE (1 << 14)
+#define HCR_TWI (1 << 13)
+#define HCR_DC (1 << 12)
+#define HCR_BSU (3 << 10)
+#define HCR_BSU_IS (1 << 10)
+#define HCR_FB (1 << 9)
+#define HCR_VA (1 << 8)
+#define HCR_VI_BIT_NR 7
+#define HCR_VF_BIT_NR 6
+#define HCR_VI (1 << HCR_VI_BIT_NR)
+#define HCR_VF (1 << HCR_VF_BIT_NR)
+#define HCR_AMO (1 << 5)
+#define HCR_IMO (1 << 4)
+#define HCR_FMO (1 << 3)
+#define HCR_PTW (1 << 2)
+#define HCR_SWIO (1 << 1)
+#define HCR_VM 1
+
+/*
+ * The bits we set in HCR:
+ * TAC: Trap ACTLR
+ * TSC: Trap SMC
+ * TSW: Trap cache operations by set/way
+ * TWI: Trap WFI
+ * BSU_IS: Upgrade barriers to the inner shareable domain
+ * FB: Force broadcast of all maintainance operations
+ * AMO: Override CPSR.A and enable signaling with VA
+ * IMO: Override CPSR.I and enable signaling with VI
+ * FMO: Override CPSR.F and enable signaling with VF
+ * SWIO: Turn set/way invalidates into set/way clean+invalidate
+ */
+#define HCR_GUEST_MASK (HCR_TSC | HCR_TSW | HCR_TWI | HCR_VM | HCR_BSU_IS | \
+ HCR_FB | HCR_TAC | HCR_AMO | HCR_IMO | HCR_FMO | \
+ HCR_SWIO)
+#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
+
+/* System Control Register (SCTLR) bits */
+#define SCTLR_TE (1 << 30)
+#define SCTLR_EE (1 << 25)
+#define SCTLR_V (1 << 13)
+
+/* Hyp System Control Register (HSCTLR) bits */
+#define HSCTLR_TE (1 << 30)
+#define HSCTLR_EE (1 << 25)
+#define HSCTLR_FI (1 << 21)
+#define HSCTLR_WXN (1 << 19)
+#define HSCTLR_I (1 << 12)
+#define HSCTLR_C (1 << 2)
+#define HSCTLR_A (1 << 1)
+#define HSCTLR_M 1
+#define HSCTLR_MASK (HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I | \
+ HSCTLR_WXN | HSCTLR_FI | HSCTLR_EE | HSCTLR_TE)
+
+/* TTBCR and HTCR Registers bits */
+#define TTBCR_EAE (1 << 31)
+#define TTBCR_IMP (1 << 30)
+#define TTBCR_SH1 (3 << 28)
+#define TTBCR_ORGN1 (3 << 26)
+#define TTBCR_IRGN1 (3 << 24)
+#define TTBCR_EPD1 (1 << 23)
+#define TTBCR_A1 (1 << 22)
+#define TTBCR_T1SZ (3 << 16)
+#define TTBCR_SH0 (3 << 12)
+#define TTBCR_ORGN0 (3 << 10)
+#define TTBCR_IRGN0 (3 << 8)
+#define TTBCR_EPD0 (1 << 7)
+#define TTBCR_T0SZ 3
+#define HTCR_MASK (TTBCR_T0SZ | TTBCR_IRGN0 | TTBCR_ORGN0 | TTBCR_SH0)
+
+/* Hyp System Trap Register */
+#define HSTR_T(x) (1 << x)
+#define HSTR_TTEE (1 << 16)
+#define HSTR_TJDBX (1 << 17)
+
+/* Hyp Coprocessor Trap Register */
+#define HCPTR_TCP(x) (1 << x)
+#define HCPTR_TCP_MASK (0x3fff)
+#define HCPTR_TASE (1 << 15)
+#define HCPTR_TTA (1 << 20)
+#define HCPTR_TCPAC (1 << 31)
+
+/* Virtualization Translation Control Register (VTCR) bits */
+#define VTCR_SH0 (3 << 12)
+#define VTCR_ORGN0 (3 << 10)
+#define VTCR_IRGN0 (3 << 8)
+#define VTCR_SL0 (3 << 6)
+#define VTCR_S (1 << 4)
+#define VTCR_T0SZ 3
+#define VTCR_MASK (VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0 | VTCR_SL0 | \
+ VTCR_S | VTCR_T0SZ | VTCR_MASK)
+#define VTCR_HTCR_SH (VTCR_SH0 | VTCR_ORGN0 | VTCR_IRGN0)
+#define VTCR_SL_L2 0 /* Starting-level: 2 */
+#define VTCR_SL_L1 (1 << 6) /* Starting-level: 1 */
+#define VTCR_GUEST_SL VTCR_SL_L1
+#define VTCR_GUEST_T0SZ 0
+#if VTCR_GUEST_SL == 0
+#define VTTBR_X (14 - VTCR_GUEST_T0SZ)
+#else
+#define VTTBR_X (5 - VTCR_GUEST_T0SZ)
+#endif
+
+/* Hyp Syndrome Register (HSR) bits */
+#define HSR_EC_SHIFT (26)
+#define HSR_EC (0x3fU << HSR_EC_SHIFT)
+#define HSR_IL (1U << 25)
+#define HSR_ISS (HSR_IL - 1)
+#define HSR_ISV_SHIFT (24)
+#define HSR_ISV (1U << HSR_ISV_SHIFT)
+#define HSR_SRT_SHIFT (16)
+#define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT)
+#define HSR_FSC (0x3f)
+#define HSR_FSC_TYPE (0x3c)
+#define HSR_SSE (1 << 21)
+#define HSR_WNR (1 << 6)
+
+#define FSC_FAULT (0x04)
+#define FSC_PERM (0x0c)
+
+/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
+#define HPFAR_MASK (~0xf)
+
+#define HSR_EC_UNKNOWN (0x00)
+#define HSR_EC_WFI (0x01)
+#define HSR_EC_CP15_32 (0x03)
+#define HSR_EC_CP15_64 (0x04)
+#define HSR_EC_CP14_MR (0x05)
+#define HSR_EC_CP14_LS (0x06)
+#define HSR_EC_CP_0_13 (0x07)
+#define HSR_EC_CP10_ID (0x08)
+#define HSR_EC_JAZELLE (0x09)
+#define HSR_EC_BXJ (0x0A)
+#define HSR_EC_CP14_64 (0x0C)
+#define HSR_EC_SVC_HYP (0x11)
+#define HSR_EC_HVC (0x12)
+#define HSR_EC_SMC (0x13)
+#define HSR_EC_IABT (0x20)
+#define HSR_EC_IABT_HYP (0x21)
+#define HSR_EC_DABT (0x24)
+#define HSR_EC_DABT_HYP (0x25)
+
+#endif /* __ARM_KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
new file mode 100644
index 0000000000000..e01dfabacdd26
--- /dev/null
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_ASM_H__
+#define __ARM_KVM_ASM_H__
+
+#define ARM_EXCEPTION_RESET 0
+#define ARM_EXCEPTION_UNDEFINED 1
+#define ARM_EXCEPTION_SOFTWARE 2
+#define ARM_EXCEPTION_PREF_ABORT 3
+#define ARM_EXCEPTION_DATA_ABORT 4
+#define ARM_EXCEPTION_IRQ 5
+#define ARM_EXCEPTION_FIQ 6
+#define ARM_EXCEPTION_HVC 7
+
+/*
+ * SMC Hypervisor API call number
+ */
+#define SMCHYP_HVBAR_W 0xfffffff0
+
+#ifndef __ASSEMBLY__
+struct kvm;
+struct kvm_vcpu;
+
+extern char __kvm_hyp_init[];
+extern char __kvm_hyp_init_end[];
+
+extern char __kvm_hyp_exit[];
+extern char __kvm_hyp_exit_end[];
+
+extern char __kvm_hyp_vector[];
+
+extern char __kvm_hyp_code_start[];
+extern char __kvm_hyp_code_end[];
+
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
+extern void __kvm_flush_vm_context(void);
+
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+#endif
+
+#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
new file mode 100644
index 0000000000000..c41537b972e0a
--- /dev/null
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_EMULATE_H__
+#define __ARM_KVM_EMULATE_H__
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_asm.h>
+
+u32 *vcpu_reg_mode(struct kvm_vcpu *vcpu, u8 reg_num, enum vcpu_mode mode);
+
+static inline u8 __vcpu_mode(u32 cpsr)
+{
+ u8 modes_table[32] = {
+ 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+ 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
+ MODE_USR, /* 0x0 */
+ MODE_FIQ, /* 0x1 */
+ MODE_IRQ, /* 0x2 */
+ MODE_SVC, /* 0x3 */
+ 0xf, 0xf, 0xf,
+ MODE_ABT, /* 0x7 */
+ 0xf, 0xf, 0xf,
+ MODE_UND, /* 0xb */
+ 0xf, 0xf, 0xf,
+ MODE_SYS /* 0xf */
+ };
+
+ return modes_table[cpsr & 0x1f];
+}
+
+static inline enum vcpu_mode vcpu_mode(struct kvm_vcpu *vcpu)
+{
+ u8 mode = __vcpu_mode(vcpu->arch.regs.cpsr);
+ BUG_ON(mode == 0xf);
+ return mode;
+}
+
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_emulate_mmio_ls(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ unsigned long instr);
+void kvm_adjust_itstate(struct kvm_vcpu *vcpu);
+void kvm_inject_undefined(struct kvm_vcpu *vcpu);
+
+/*
+ * Return the SPSR for the specified mode of the virtual CPU.
+ */
+static inline u32 *vcpu_spsr_mode(struct kvm_vcpu *vcpu, enum vcpu_mode mode)
+{
+ switch (mode) {
+ case MODE_SVC:
+ return &vcpu->arch.regs.svc_regs[2];
+ case MODE_ABT:
+ return &vcpu->arch.regs.abt_regs[2];
+ case MODE_UND:
+ return &vcpu->arch.regs.und_regs[2];
+ case MODE_IRQ:
+ return &vcpu->arch.regs.irq_regs[2];
+ case MODE_FIQ:
+ return &vcpu->arch.regs.fiq_regs[7];
+ default:
+ BUG();
+ }
+}
+
+/* Get vcpu register for current mode */
+static inline u32 *vcpu_reg(struct kvm_vcpu *vcpu, unsigned long reg_num)
+{
+ return vcpu_reg_mode(vcpu, reg_num, vcpu_mode(vcpu));
+}
+
+static inline u32 *vcpu_pc(struct kvm_vcpu *vcpu)
+{
+ return vcpu_reg(vcpu, 15);
+}
+
+static inline u32 *vcpu_cpsr(struct kvm_vcpu *vcpu)
+{
+ return &vcpu->arch.regs.cpsr;
+}
+
+/* Get vcpu SPSR for current mode */
+static inline u32 *vcpu_spsr(struct kvm_vcpu *vcpu)
+{
+ return vcpu_spsr_mode(vcpu, vcpu_mode(vcpu));
+}
+
+static inline bool mode_has_spsr(struct kvm_vcpu *vcpu)
+{
+ return (vcpu_mode(vcpu) < MODE_USR);
+}
+
+static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu)
+{
+ BUG_ON(vcpu_mode(vcpu) > MODE_SYS);
+ return vcpu_mode(vcpu) != MODE_USR;
+}
+
+#endif /* __ARM_KVM_EMULATE_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
new file mode 100644
index 0000000000000..0c7e782b2e97e
--- /dev/null
+++ b/arch/arm/include/asm/kvm_host.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_HOST_H__
+#define __ARM_KVM_HOST_H__
+
+#define KVM_MAX_VCPUS 4
+#define KVM_MEMORY_SLOTS 32
+#define KVM_PRIVATE_MEM_SLOTS 4
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+/* We don't currently support large pages. */
+#define KVM_HPAGE_GFN_SHIFT(x) 0
+#define KVM_NR_PAGE_SIZES 1
+#define KVM_PAGES_PER_HPAGE(x) (1UL<<31)
+
+struct kvm_vcpu;
+u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
+int kvm_target_cpu(void);
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
+
+struct kvm_arch {
+ /* The VMID generation used for the virt. memory system */
+ u64 vmid_gen;
+ u32 vmid;
+
+ /* 1-level 2nd stage table and lock */
+ struct mutex pgd_mutex;
+ pgd_t *pgd;
+
+ /* VTTBR value associated with above pgd and vmid */
+ u64 vttbr;
+};
+
+#define EXCEPTION_NONE 0
+#define EXCEPTION_RESET 0x80
+#define EXCEPTION_UNDEFINED 0x40
+#define EXCEPTION_SOFTWARE 0x20
+#define EXCEPTION_PREFETCH 0x10
+#define EXCEPTION_DATA 0x08
+#define EXCEPTION_IMPRECISE 0x04
+#define EXCEPTION_IRQ 0x02
+#define EXCEPTION_FIQ 0x01
+
+struct kvm_vcpu_regs {
+ u32 usr_regs[15]; /* R0_usr - R14_usr */
+ u32 svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */
+ u32 abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */
+ u32 und_regs[3]; /* SP_und, LR_und, SPSR_und */
+ u32 irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */
+ u32 fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */
+ u32 pc; /* The program counter (r15) */
+ u32 cpsr; /* The guest CPSR */
+} __packed;
+
+enum cp15_regs {
+ c0_MIDR, /* Main ID Register */
+ c0_MPIDR, /* MultiProcessor ID Register */
+ c1_SCTLR, /* System Control Register */
+ c1_ACTLR, /* Auxilliary Control Register */
+ c1_CPACR, /* Coprocessor Access Control */
+ c2_TTBR0, /* Translation Table Base Register 0 */
+ c2_TTBR0_high, /* TTBR0 top 32 bits */
+ c2_TTBR1, /* Translation Table Base Register 1 */
+ c2_TTBR1_high, /* TTBR1 top 32 bits */
+ c2_TTBCR, /* Translation Table Base Control R. */
+ c3_DACR, /* Domain Access Control Register */
+ c5_DFSR, /* Data Fault Status Register */
+ c5_IFSR, /* Instruction Fault Status Register */
+ c5_ADFSR, /* Auxilary Data Fault Status Register */
+ c5_AIFSR, /* Auxilary Instruction Fault Status Register */
+ c6_DFAR, /* Data Fault Address Register */
+ c6_IFAR, /* Instruction Fault Address Register */
+ c10_PRRR, /* Primary Region Remap Register */
+ c10_NMRR, /* Normal Memory Remap Register */
+ c12_VBAR, /* Vector Base Address Register */
+ c13_CID, /* Context ID Register */
+ c13_TID_URW, /* Thread ID, User R/W */
+ c13_TID_URO, /* Thread ID, User R/O */
+ c13_TID_PRIV, /* Thread ID, Priveleged */
+
+ nr_cp15_regs
+};
+
+struct kvm_vcpu_arch {
+ struct kvm_vcpu_regs regs;
+
+ /* System control coprocessor (cp15) */
+ u32 cp15[nr_cp15_regs];
+
+ /* Exception Information */
+ u32 hsr; /* Hyp Syndrom Register */
+ u32 hdfar; /* Hyp Data Fault Address Register */
+ u32 hifar; /* Hyp Inst. Fault Address Register */
+ u32 hpfar; /* Hyp IPA Fault Address Register */
+ u64 pc_ipa; /* IPA for the current PC (VA to PA result) */
+ u64 pc_ipa2; /* same as above, but for non-aligned wide thumb
+ instructions */
+
+ /* dcache set/way operation pending */
+ cpumask_t require_dcache_flush;
+
+ /* IO related fields */
+ bool mmio_sign_extend; /* for byte/halfword loads */
+ u32 mmio_rd;
+
+ /* Interrupt related fields */
+ u32 irq_lines; /* IRQ and FIQ levels */
+ u32 wait_for_interrupts;
+
+ /* Hyp exception information */
+ u32 hyp_pc; /* PC when exception was taken from Hyp mode */
+};
+
+struct kvm_vm_stat {
+ u32 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat {
+ u32 exits;
+ u32 mmio_exits;
+ u32 kmmio_exits;
+ u32 wfi_exits;
+ u32 irq_exits;
+ u32 halt_wakeup;
+};
+
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ return 0;
+}
+
+#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
new file mode 100644
index 0000000000000..7ccd259f85d31
--- /dev/null
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_MMU_H__
+#define __ARM_KVM_MMU_H__
+
+/*
+ * The architecture supports 40-bit IPA as input to the 2nd stage translations
+ * and PTRS_PER_PGD2 could therefore be 1024.
+ *
+ * To save a bit of memory and to avoid alignment issues we assume 39-bit IPA
+ * for now, but remember that the level-1 table must be aligned to its size.
+ */
+#define PTRS_PER_PGD2 512
+#define PGD2_ORDER get_order(PTRS_PER_PGD2 * sizeof(pgd_t))
+
+int create_hyp_mappings(void *from, void *to);
+void free_hyp_pmds(void);
+
+int kvm_alloc_stage2_pgd(struct kvm *kvm);
+void kvm_free_stage2_pgd(struct kvm *kvm);
+
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
+
+#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h
index a6efcdd6fd251..3787c9fdc6e67 100644
--- a/arch/arm/include/asm/mach/map.h
+++ b/arch/arm/include/asm/mach/map.h
@@ -37,6 +37,7 @@ extern void iotable_init(struct map_desc *, int);
struct mem_type;
extern const struct mem_type *get_mem_type(unsigned int type);
+extern pteval_t get_mem_type_prot_pte(unsigned int type);
/*
* external interface to remap single page with appropriate type
*/
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index d7952824c5c4e..18f5cef82ad58 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -32,6 +32,9 @@
#define PMD_TYPE_SECT (_AT(pmdval_t, 1) << 0)
#define PMD_BIT4 (_AT(pmdval_t, 0))
#define PMD_DOMAIN(x) (_AT(pmdval_t, 0))
+#define PMD_APTABLE_SHIFT (61)
+#define PMD_APTABLE (_AT(pgdval_t, 3) << PGD_APTABLE_SHIFT)
+#define PMD_PXNTABLE (_AT(pgdval_t, 1) << 59)
/*
* - section
@@ -41,9 +44,11 @@
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_nG (_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53)
#define PMD_SECT_XN (_AT(pmdval_t, 1) << 54)
#define PMD_SECT_AP_WRITE (_AT(pmdval_t, 0))
#define PMD_SECT_AP_READ (_AT(pmdval_t, 0))
+#define PMD_SECT_AP1 (_AT(pmdval_t, 1) << 6)
#define PMD_SECT_TEX(x) (_AT(pmdval_t, 0))
/*
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index b24903549d1c1..7351eee5e5473 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -102,11 +102,24 @@
*/
#define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */
+/*
+ * 2-nd stage PTE definitions for LPAE.
+ */
+#define L_PTE2_SHARED L_PTE_SHARED
+#define L_PTE2_READ (_AT(pteval_t, 1) << 6) /* HAP[0] */
+#define L_PTE2_WRITE (_AT(pteval_t, 1) << 7) /* HAP[1] */
+#define L_PTE2_NORM_WB (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */
+#define L_PTE2_INNER_WB (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */
+
#ifndef __ASSEMBLY__
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!(pud_val(pud) & 2))
#define pud_present(pud) (pud_val(pud))
+#define pmd_table(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
+ PMD_TYPE_TABLE)
+#define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \
+ PMD_TYPE_SECT)
#define pud_clear(pudp) \
do { \
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index f66626d71e7d1..a31d0e996ec45 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -70,6 +70,7 @@ extern void __pgd_error(const char *file, int line, pgd_t);
extern pgprot_t pgprot_user;
extern pgprot_t pgprot_kernel;
+extern pgprot_t pgprot_guest;
#define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b))
@@ -82,6 +83,10 @@ extern pgprot_t pgprot_kernel;
#define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY)
#define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN)
#define PAGE_KERNEL_EXEC pgprot_kernel
+#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER)
+#define PAGE_KVM_GUEST _MOD_PROT(pgprot_guest, L_PTE2_READ | \
+ L_PTE2_NORM_WB | L_PTE2_INNER_WB | \
+ L_PTE2_SHARED)
#define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN)
#define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN)
diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h
index f5989f46b4d2d..e5dc5f6793d78 100644
--- a/arch/arm/include/asm/unified.h
+++ b/arch/arm/include/asm/unified.h
@@ -54,6 +54,18 @@
#endif /* CONFIG_THUMB2_KERNEL */
+#ifdef CONFIG_KVM_ARM_HOST
+#ifdef __ASSEMBLY__
+.arch_extension sec
+.arch_extension virt
+#else
+__asm__(
+" .arch_extension sec\n"
+" .arch_extension virt\n"
+);
+#endif
+#endif
+
#ifndef CONFIG_ARM_ASM_UNIFIED
/*
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index b57c75e0b01f9..38d3a122dbd8d 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -48,6 +48,13 @@ extern void __aeabi_ulcmp(void);
extern void fpundefinstr(void);
+#ifdef CONFIG_KVM_ARM_HOST
+/* This is needed for KVM */
+extern void __irq_svc(void);
+
+EXPORT_SYMBOL_GPL(__irq_svc);
+#endif
+
/* platform dependent support */
EXPORT_SYMBOL(__udelay);
EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 1429d8989fb90..9c76b538a14ba 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -13,6 +13,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
+#include <linux/kvm_host.h>
#include <asm/cacheflush.h>
#include <asm/glue-df.h>
#include <asm/glue-pf.h>
@@ -144,5 +145,47 @@ int main(void)
DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL);
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
+#ifdef CONFIG_KVM_ARM_HOST
+ DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
+ DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.cp15[c0_MIDR]));
+ DEFINE(VCPU_MPIDR, offsetof(struct kvm_vcpu, arch.cp15[c0_MPIDR]));
+ DEFINE(VCPU_SCTLR, offsetof(struct kvm_vcpu, arch.cp15[c1_SCTLR]));
+ DEFINE(VCPU_CPACR, offsetof(struct kvm_vcpu, arch.cp15[c1_CPACR]));
+ DEFINE(VCPU_TTBR0, offsetof(struct kvm_vcpu, arch.cp15[c2_TTBR0]));
+ DEFINE(VCPU_TTBR1, offsetof(struct kvm_vcpu, arch.cp15[c2_TTBR1]));
+ DEFINE(VCPU_TTBCR, offsetof(struct kvm_vcpu, arch.cp15[c2_TTBCR]));
+ DEFINE(VCPU_DACR, offsetof(struct kvm_vcpu, arch.cp15[c3_DACR]));
+ DEFINE(VCPU_DFSR, offsetof(struct kvm_vcpu, arch.cp15[c5_DFSR]));
+ DEFINE(VCPU_IFSR, offsetof(struct kvm_vcpu, arch.cp15[c5_IFSR]));
+ DEFINE(VCPU_ADFSR, offsetof(struct kvm_vcpu, arch.cp15[c5_ADFSR]));
+ DEFINE(VCPU_AIFSR, offsetof(struct kvm_vcpu, arch.cp15[c5_AIFSR]));
+ DEFINE(VCPU_DFAR, offsetof(struct kvm_vcpu, arch.cp15[c6_DFAR]));
+ DEFINE(VCPU_IFAR, offsetof(struct kvm_vcpu, arch.cp15[c6_IFAR]));
+ DEFINE(VCPU_PRRR, offsetof(struct kvm_vcpu, arch.cp15[c10_PRRR]));
+ DEFINE(VCPU_NMRR, offsetof(struct kvm_vcpu, arch.cp15[c10_NMRR]));
+ DEFINE(VCPU_VBAR, offsetof(struct kvm_vcpu, arch.cp15[c12_VBAR]));
+ DEFINE(VCPU_CID, offsetof(struct kvm_vcpu, arch.cp15[c13_CID]));
+ DEFINE(VCPU_TID_URW, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URW]));
+ DEFINE(VCPU_TID_URO, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_URO]));
+ DEFINE(VCPU_TID_PRIV, offsetof(struct kvm_vcpu, arch.cp15[c13_TID_PRIV]));
+ DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs));
+ DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs));
+ DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs));
+ DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs));
+ DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs));
+ DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs));
+ DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
+ DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.pc));
+ DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.cpsr));
+ DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
+ DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.hsr));
+ DEFINE(VCPU_HDFAR, offsetof(struct kvm_vcpu, arch.hdfar));
+ DEFINE(VCPU_HIFAR, offsetof(struct kvm_vcpu, arch.hifar));
+ DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.hpfar));
+ DEFINE(VCPU_PC_IPA, offsetof(struct kvm_vcpu, arch.pc_ipa));
+ DEFINE(VCPU_PC_IPA2, offsetof(struct kvm_vcpu, arch.pc_ipa2));
+ DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.hyp_pc));
+ DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
+#endif
return 0;
}
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 0d1851ca6eb99..1c4937a220b7c 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -209,6 +209,7 @@ __dabt_svc:
ENDPROC(__dabt_svc)
.align 5
+ .globl __irq_svc
__irq_svc:
svc_entry
irq_handler
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 43a31fb06318d..33da40aac39ab 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -19,7 +19,11 @@
ALIGN_FUNCTION(); \
VMLINUX_SYMBOL(__idmap_text_start) = .; \
*(.idmap.text) \
- VMLINUX_SYMBOL(__idmap_text_end) = .;
+ VMLINUX_SYMBOL(__idmap_text_end) = .; \
+ ALIGN_FUNCTION(); \
+ VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \
+ *(.hyp.idmap.text) \
+ VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
#ifdef CONFIG_HOTPLUG_CPU
#define ARM_CPU_DISCARD(x)
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
new file mode 100644
index 0000000000000..7fa50d3df6294
--- /dev/null
+++ b/arch/arm/kvm/Kconfig
@@ -0,0 +1,45 @@
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ ---help---
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support"
+ select PREEMPT_NOTIFIERS
+ select ANON_INODES
+ select KVM_MMIO
+ depends on CPU_V7 && ARM_VIRT_EXT
+ ---help---
+ Support hosting virtualized guest machines. You will also
+ need to select one or more of the processor modules below.
+
+ This module provides access to the hardware capabilities through
+ a character device node named /dev/kvm.
+
+ If unsure, say N.
+
+config KVM_ARM_HOST
+ bool "KVM host support for ARM cpus."
+ depends on KVM
+ depends on MMU
+ depends on CPU_V7 && ARM_VIRT_EXT
+ select MMU_NOTIFIER
+ ---help---
+ Provides host support for ARM processors.
+
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
new file mode 100644
index 0000000000000..5f40c5a5a2bda
--- /dev/null
+++ b/arch/arm/kvm/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
+CFLAGS_arm.o := -I.
+CFLAGS_mmu.o := -I.
+
+AFLAGS_interrupts.o := -I$(obj)
+
+obj-$(CONFIG_KVM_ARM_HOST) += init.o interrupts.o exports.o
+
+kvm-arm-y += $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+
+kvm-arm-y += arm.o guest.o mmu.o emulate.o reset.o
+
+obj-$(CONFIG_KVM) += kvm-arm.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
new file mode 100644
index 0000000000000..f3b206ac1fe55
--- /dev/null
+++ b/arch/arm/kvm/arm.c
@@ -0,0 +1,881 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/mman.h>
+#include <linux/sched.h>
+#include <linux/kvm.h>
+#include <trace/events/kvm.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#include <asm/cputype.h>
+#include <asm/unified.h>
+#include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/mman.h>
+#include <asm/idmap.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_emulate.h>
+
+static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
+
+/* The VMID used in the VTTBR */
+static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
+static u8 kvm_next_vmid;
+DEFINE_SPINLOCK(kvm_vmid_lock);
+
+int kvm_arch_hardware_enable(void *garbage)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ return 1;
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+ return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+ *(int *)rtn = 0;
+}
+
+void kvm_arch_sync_events(struct kvm *kvm)
+{
+}
+
+/**
+ * kvm_arch_init_vm - initializes a VM data structure
+ * @kvm: pointer to the KVM struct
+ */
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+ int ret = 0;
+
+ if (type)
+ return -EINVAL;
+
+ ret = kvm_alloc_stage2_pgd(kvm);
+ if (ret)
+ goto out_fail_alloc;
+ mutex_init(&kvm->arch.pgd_mutex);
+
+ ret = create_hyp_mappings(kvm, kvm + 1);
+ if (ret)
+ goto out_free_stage2_pgd;
+
+ /* Mark the initial VMID generation invalid */
+ kvm->arch.vmid_gen = 0;
+
+ return ret;
+out_free_stage2_pgd:
+ kvm_free_stage2_pgd(kvm);
+out_fail_alloc:
+ return ret;
+}
+
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
+void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+}
+
+int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+{
+ return 0;
+}
+
+/**
+ * kvm_arch_destroy_vm - destroy the VM data structure
+ * @kvm: pointer to the KVM struct
+ */
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ int i;
+
+ kvm_free_stage2_pgd(kvm);
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (kvm->vcpus[i]) {
+ kvm_arch_vcpu_free(kvm->vcpus[i]);
+ kvm->vcpus[i] = NULL;
+ }
+ }
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+ int r;
+ switch (ext) {
+ case KVM_CAP_USER_MEMORY:
+ case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+ r = 1;
+ break;
+ case KVM_CAP_COALESCED_MMIO:
+ r = KVM_COALESCED_MMIO_PAGE_OFFSET;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+ return r;
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ return 0;
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot old,
+ struct kvm_userspace_memory_region *mem,
+ int user_alloc)
+{
+ return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+}
+
+void kvm_arch_flush_shadow(struct kvm *kvm)
+{
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+ int err;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+ if (!vcpu) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = kvm_vcpu_init(vcpu, kvm, id);
+ if (err)
+ goto free_vcpu;
+
+ err = create_hyp_mappings(vcpu, vcpu + 1);
+ if (err)
+ goto vcpu_uninit;
+
+ return vcpu;
+vcpu_uninit:
+ kvm_vcpu_uninit(vcpu);
+free_vcpu:
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+ return ERR_PTR(err);
+}
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
+ kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ kvm_arch_vcpu_free(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+int __attribute_const__ kvm_target_cpu(void)
+{
+ unsigned int midr;
+
+ midr = read_cpuid_id();
+ switch ((midr >> 4) & 0xfff) {
+ case CORTEX_A15:
+ return CORTEX_A15;
+ default:
+ return -EINVAL;
+ }
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ ret = kvm_reset_vcpu(vcpu);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ vcpu->cpu = cpu;
+
+ /*
+ * Check whether this vcpu requires the cache to be flushed on
+ * this physical CPU. This is a consequence of doing dcache
+ * operations by set/way on this vcpu. We do it here in order
+ * to be in a non-preemptible section.
+ */
+ if (cpumask_test_and_clear_cpu(cpu, &vcpu->arch.require_dcache_flush))
+ flush_cache_all(); /* We'd really want v7_flush_dcache_all() */
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ return -EINVAL;
+}
+
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ return -EINVAL;
+}
+
+/**
+ * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled
+ * @v: The VCPU pointer
+ *
+ * If the guest CPU is not waiting for interrupts (or waiting and
+ * an interrupt is pending) then it is by definition runnable.
+ */
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+{
+ return !!v->arch.irq_lines ||
+ !v->arch.wait_for_interrupts;
+}
+
+int kvm_arch_vcpu_in_guest_mode(struct kvm_vcpu *v)
+{
+ return v->mode == IN_GUEST_MODE;
+}
+
+static void reset_vm_context(void *info)
+{
+ __kvm_flush_vm_context();
+}
+
+/**
+ * check_new_vmid_gen - check that the VMID is still valid
+ * @kvm: The VM's VMID to checkt
+ *
+ * return true if there is a new generation of VMIDs being used
+ *
+ * The hardware supports only 256 values with the value zero reserved for the
+ * host, so we check if an assigned value belongs to a previous generation,
+ * which which requires us to assign a new value. If we're the first to use a
+ * VMID for the new generation, we must flush necessary caches and TLBs on all
+ * CPUs.
+ */
+static bool check_new_vmid_gen(struct kvm *kvm)
+{
+ return unlikely(kvm->arch.vmid_gen != atomic64_read(&kvm_vmid_gen));
+}
+
+/**
+ * update_vttbr - Update the VTTBR with a valid VMID before the guest runs
+ * @kvm The guest that we are about to run
+ *
+ * Called from kvm_arch_vcpu_ioctl_run before entering the guest to ensure the
+ * VM has a valid VMID, otherwise assigns a new one and flushes corresponding
+ * caches and TLBs.
+ */
+static void update_vttbr(struct kvm *kvm)
+{
+ phys_addr_t pgd_phys;
+
+ if (!check_new_vmid_gen(kvm))
+ return;
+
+ spin_lock(&kvm_vmid_lock);
+
+ /* First user of a new VMID generation? */
+ if (unlikely(kvm_next_vmid == 0)) {
+ atomic64_inc(&kvm_vmid_gen);
+ kvm_next_vmid = 1;
+
+ /* This does nothing on UP */
+ smp_call_function(reset_vm_context, NULL, 1);
+
+ /*
+ * On SMP we know no other CPUs can use this CPU's or
+ * each other's VMID since the kvm_vmid_lock blocks
+ * them from reentry to the guest.
+ */
+
+ reset_vm_context(NULL);
+ }
+
+ kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
+ kvm->arch.vmid = kvm_next_vmid;
+ kvm_next_vmid++;
+
+ /* update vttbr to be used with the new vmid */
+ pgd_phys = virt_to_phys(kvm->arch.pgd);
+ kvm->arch.vttbr = pgd_phys & ((1LLU << 40) - 1)
+ & ~((2 << VTTBR_X) - 1);
+ kvm->arch.vttbr |= (u64)(kvm->arch.vmid) << 48;
+
+ spin_unlock(&kvm_vmid_lock);
+}
+
+static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* SVC called from Hyp mode should never get here */
+ kvm_debug("SVC called from Hyp mode shouldn't go here\n");
+ BUG();
+ return -EINVAL; /* Squash warning */
+}
+
+static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /*
+ * Guest called HVC instruction:
+ * Let it know we don't want that by injecting an undefined exception.
+ */
+ kvm_debug("hvc: %x (at %08x)", vcpu->arch.hsr & ((1 << 16) - 1),
+ vcpu->arch.regs.pc);
+ kvm_debug(" HSR: %8x", vcpu->arch.hsr);
+ kvm_inject_undefined(vcpu);
+ return 0;
+}
+
+static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* We don't support SMC; don't do that. */
+ kvm_debug("smc: at %08x", vcpu->arch.regs.pc);
+ return -EINVAL;
+}
+
+static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* The hypervisor should never cause aborts */
+ kvm_err("Prefetch Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
+ vcpu->arch.hifar, vcpu->arch.hsr);
+ return -EFAULT;
+}
+
+static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ /* This is either an error in the ws. code or an external abort */
+ kvm_err("Data Abort taken from Hyp mode at %#08x (HSR: %#08x)\n",
+ vcpu->arch.hdfar, vcpu->arch.hsr);
+ return -EFAULT;
+}
+
+typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+static exit_handle_fn arm_exit_handlers[] = {
+ [HSR_EC_WFI] = kvm_handle_wfi,
+ [HSR_EC_CP15_32] = kvm_handle_cp15_32,
+ [HSR_EC_CP15_64] = kvm_handle_cp15_64,
+ [HSR_EC_CP14_MR] = kvm_handle_cp14_access,
+ [HSR_EC_CP14_LS] = kvm_handle_cp14_load_store,
+ [HSR_EC_CP14_64] = kvm_handle_cp14_access,
+ [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access,
+ [HSR_EC_CP10_ID] = kvm_handle_cp10_id,
+ [HSR_EC_SVC_HYP] = handle_svc_hyp,
+ [HSR_EC_HVC] = handle_hvc,
+ [HSR_EC_SMC] = handle_smc,
+ [HSR_EC_IABT] = kvm_handle_guest_abort,
+ [HSR_EC_IABT_HYP] = handle_pabt_hyp,
+ [HSR_EC_DABT] = kvm_handle_guest_abort,
+ [HSR_EC_DABT_HYP] = handle_dabt_hyp,
+};
+
+/*
+ * Return 0 to return to guest, < 0 on error, exit_reason ( > 0) on proper
+ * exit to QEMU.
+ */
+static int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index)
+{
+ unsigned long hsr_ec;
+
+ switch (exception_index) {
+ case ARM_EXCEPTION_IRQ:
+ vcpu->stat.irq_exits++;
+ return 0;
+ case ARM_EXCEPTION_UNDEFINED:
+ kvm_err("Undefined exception in Hyp mode at: %#08x\n",
+ vcpu->arch.hyp_pc);
+ BUG();
+ panic("KVM: Hypervisor undefined exception!\n");
+ case ARM_EXCEPTION_DATA_ABORT:
+ case ARM_EXCEPTION_PREF_ABORT:
+ case ARM_EXCEPTION_HVC:
+ hsr_ec = (vcpu->arch.hsr & HSR_EC) >> HSR_EC_SHIFT;
+
+ if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers)
+ || !arm_exit_handlers[hsr_ec]) {
+ kvm_err("Unkown exception class: %#08lx, "
+ "hsr: %#08x\n", hsr_ec,
+ (unsigned int)vcpu->arch.hsr);
+ BUG();
+ }
+
+ return arm_exit_handlers[hsr_ec](vcpu, run);
+ default:
+ kvm_pr_unimpl("Unsupported exception type: %d",
+ exception_index);
+ return -EINVAL;
+ }
+}
+
+/*
+ * Return 0 to proceed with guest entry
+ */
+static int vcpu_pre_guest_enter(struct kvm_vcpu *vcpu, int *exit_reason)
+{
+ if (signal_pending(current)) {
+ *exit_reason = KVM_EXIT_INTR;
+ return -EINTR;
+ }
+
+ if (check_new_vmid_gen(vcpu->kvm))
+ return 1;
+
+ BUG_ON(__vcpu_mode(*vcpu_cpsr(vcpu)) == 0xf);
+
+ return 0;
+}
+
+/**
+ * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run structure pointer used for userspace state exchange
+ *
+ * This function is called through the VCPU_RUN ioctl called from user space. It
+ * will execute VM code in a loop until the time slice for the process is used
+ * or some emulation is needed from user space in which case the function will
+ * return with return value 0 and with the kvm_run structure filled in with the
+ * required data for the requested emulation.
+ */
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int ret = 0;
+ int exit_reason;
+ sigset_t sigsaved;
+
+ if (run->exit_reason == KVM_EXIT_MMIO) {
+ ret = kvm_handle_mmio_return(vcpu, vcpu->run);
+ if (ret)
+ return ret;
+ }
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+ exit_reason = KVM_EXIT_UNKNOWN;
+ while (exit_reason == KVM_EXIT_UNKNOWN) {
+ /*
+ * Check conditions before entering the guest
+ */
+ cond_resched();
+
+ if (vcpu->arch.wait_for_interrupts)
+ kvm_vcpu_block(vcpu);
+
+ update_vttbr(vcpu->kvm);
+
+ /*
+ * Make sure preemption is disabled while calling handle_exit
+ * as exit handling touches CPU-specific resources, such as
+ * caches, and we must stay on the same CPU.
+ *
+ * Code that might sleep must disable preemption and access
+ * CPU-specific resources first.
+ */
+ preempt_disable();
+ local_irq_disable();
+
+ /* Re-check atomic conditions */
+ ret = vcpu_pre_guest_enter(vcpu, &exit_reason);
+ if (ret != 0) {
+ local_irq_enable();
+ preempt_enable();
+ continue;
+ }
+
+ /**************************************************************
+ * Enter the guest
+ */
+ trace_kvm_entry(vcpu->arch.regs.pc);
+ kvm_guest_enter();
+ vcpu->mode = IN_GUEST_MODE;
+
+ ret = __kvm_vcpu_run(vcpu);
+
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ vcpu->stat.exits++;
+ kvm_guest_exit();
+ trace_kvm_exit(vcpu->arch.regs.pc);
+ local_irq_enable();
+
+ /*
+ * Back from guest
+ *************************************************************/
+
+ ret = handle_exit(vcpu, run, ret);
+ preempt_enable();
+ if (ret < 0) {
+ kvm_err("Error in handle_exit\n");
+ break;
+ } else {
+ exit_reason = ret; /* 0 == KVM_EXIT_UNKNOWN */
+ }
+ }
+
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+ run->exit_reason = exit_reason;
+ return ret;
+}
+
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level)
+{
+ unsigned int vcpu_idx;
+ struct kvm_vcpu *vcpu;
+ unsigned long *ptr;
+ bool set;
+ int bit_nr;
+
+ vcpu_idx = irq_level->irq >> 1;
+ if (vcpu_idx >= KVM_MAX_VCPUS)
+ return -EINVAL;
+
+ vcpu = kvm_get_vcpu(kvm, vcpu_idx);
+ if (!vcpu)
+ return -EINVAL;
+
+ trace_kvm_set_irq(irq_level->irq, irq_level->level, 0);
+
+ if ((irq_level->irq & 1) == KVM_ARM_IRQ_LINE)
+ bit_nr = HCR_VI_BIT_NR;
+ else /* KVM_ARM_FIQ_LINE */
+ bit_nr = HCR_VF_BIT_NR;
+
+ ptr = (unsigned long *)&vcpu->arch.irq_lines;
+ if (irq_level->level)
+ set = test_and_set_bit(bit_nr, ptr);
+ else
+ set = test_and_clear_bit(bit_nr, ptr);
+
+ /*
+ * If we didn't change anything, no need to wake up or kick other CPUs
+ */
+ if (!!set == !!irq_level->level)
+ return 0;
+
+ /*
+ * The vcpu irq_lines field was updated, wake up sleeping VCPUs and
+ * trigger a world-switch round on the running physical CPU to set the
+ * virtual IRQ/FIQ fields in the HCR appropriately.
+ */
+ if (irq_level->level)
+ vcpu->arch.wait_for_interrupts = 0;
+ kvm_vcpu_kick(vcpu);
+
+ return 0;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+ return -EINVAL;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -EINVAL;
+}
+
+static void cpu_set_vector(void *vector)
+{
+ unsigned long vector_ptr;
+ unsigned long smc_hyp_nr;
+
+ vector_ptr = (unsigned long)vector;
+ smc_hyp_nr = SMCHYP_HVBAR_W;
+
+ /*
+ * Set the HVBAR
+ */
+ asm volatile (
+ "mov r0, %[vector_ptr]\n\t"
+ "mov r7, %[smc_hyp_nr]\n\t"
+ "smc #0\n\t" : :
+ [vector_ptr] "r" (vector_ptr),
+ [smc_hyp_nr] "r" (smc_hyp_nr) :
+ "r0", "r7");
+}
+
+static void cpu_init_hyp_mode(void *vector)
+{
+ unsigned long pgd_ptr;
+ unsigned long hyp_stack_ptr;
+ unsigned long stack_page;
+
+ cpu_set_vector(vector);
+
+ pgd_ptr = virt_to_phys(hyp_pgd);
+ stack_page = __get_cpu_var(kvm_arm_hyp_stack_page);
+ hyp_stack_ptr = stack_page + PAGE_SIZE;
+
+ /*
+ * Call initialization code
+ */
+ asm volatile (
+ "mov r0, %[pgd_ptr]\n\t"
+ "mov r1, %[hyp_stack_ptr]\n\t"
+ "hvc #0\n\t" : :
+ [pgd_ptr] "r" (pgd_ptr),
+ [hyp_stack_ptr] "r" (hyp_stack_ptr) :
+ "r0", "r1");
+}
+
+/**
+ * Inits Hyp-mode on all online CPUs
+ */
+static int init_hyp_mode(void)
+{
+ phys_addr_t init_phys_addr;
+ int cpu;
+ int err = 0;
+
+ /*
+ * Allocate stack pages for Hypervisor-mode
+ */
+ for_each_possible_cpu(cpu) {
+ unsigned long stack_page;
+
+ stack_page = __get_free_page(GFP_KERNEL);
+ if (!stack_page) {
+ err = -ENOMEM;
+ goto out_free_stack_pages;
+ }
+
+ per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page;
+ }
+
+ /*
+ * Execute the init code on each CPU.
+ *
+ * Note: The stack is not mapped yet, so don't do anything else than
+ * initializing the hypervisor mode on each CPU using a local stack
+ * space for temporary storage.
+ */
+ init_phys_addr = virt_to_phys(__kvm_hyp_init);
+ for_each_online_cpu(cpu) {
+ smp_call_function_single(cpu, cpu_init_hyp_mode,
+ (void *)(long)init_phys_addr, 1);
+ }
+
+ /*
+ * Unmap the identity mapping
+ */
+ hyp_idmap_teardown();
+
+ /*
+ * Map the Hyp-code called directly from the host
+ */
+ err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
+ if (err) {
+ kvm_err("Cannot map world-switch code\n");
+ goto out_free_mappings;
+ }
+
+ /*
+ * Map the Hyp stack pages
+ */
+ for_each_possible_cpu(cpu) {
+ char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
+ err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE);
+
+ if (err) {
+ kvm_err("Cannot map hyp stack\n");
+ goto out_free_mappings;
+ }
+ }
+
+ /*
+ * Set the HVBAR to the virtual kernel address
+ */
+ for_each_online_cpu(cpu)
+ smp_call_function_single(cpu, cpu_set_vector,
+ __kvm_hyp_vector, 1);
+
+ return 0;
+out_free_mappings:
+ free_hyp_pmds();
+out_free_stack_pages:
+ for_each_possible_cpu(cpu)
+ free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+ return err;
+}
+
+/**
+ * Initialize Hyp-mode and memory mappings on all CPUs.
+ */
+int kvm_arch_init(void *opaque)
+{
+ int err;
+
+ if (kvm_target_cpu() < 0) {
+ kvm_err("Target CPU not supported!\n");
+ return -ENODEV;
+ }
+
+ err = init_hyp_mode();
+ if (err)
+ goto out_err;
+
+ return 0;
+out_err:
+ return err;
+}
+
+static void cpu_exit_hyp_mode(void *vector)
+{
+ cpu_set_vector(vector);
+
+ /*
+ * Disable Hyp-MMU for each cpu
+ */
+ asm volatile ("hvc #0");
+}
+
+static int exit_hyp_mode(void)
+{
+ phys_addr_t exit_phys_addr;
+ int cpu;
+
+ /*
+ * TODO: flush Hyp TLB in case idmap code overlaps.
+ * Note that we should do this in the monitor code when switching the
+ * HVBAR, but this is going away and should be rather done in the Hyp
+ * mode change of HVBAR.
+ */
+ hyp_idmap_setup();
+ exit_phys_addr = virt_to_phys(__kvm_hyp_exit);
+ BUG_ON(exit_phys_addr & 0x1f);
+
+ /*
+ * Execute the exit code on each CPU.
+ *
+ * Note: The stack is not mapped yet, so don't do anything else than
+ * initializing the hypervisor mode on each CPU using a local stack
+ * space for temporary storage.
+ */
+ for_each_online_cpu(cpu) {
+ smp_call_function_single(cpu, cpu_exit_hyp_mode,
+ (void *)(long)exit_phys_addr, 1);
+ }
+
+ return 0;
+}
+
+void kvm_arch_exit(void)
+{
+ int cpu;
+
+ exit_hyp_mode();
+
+ free_hyp_pmds();
+ for_each_possible_cpu(cpu)
+ free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+}
+
+static int arm_init(void)
+{
+ int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+ return rc;
+}
+
+static void __exit arm_exit(void)
+{
+ kvm_exit();
+}
+
+module_init(arm_init);
+module_exit(arm_exit)
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
new file mode 100644
index 0000000000000..564add20f2e02
--- /dev/null
+++ b/arch/arm/kvm/emulate.c
@@ -0,0 +1,873 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/mm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_emulate.h>
+#include <trace/events/kvm.h>
+
+#include "trace.h"
+
+#define REG_OFFSET(_reg) \
+ (offsetof(struct kvm_vcpu_regs, _reg) / sizeof(u32))
+
+#define USR_REG_OFFSET(_num) REG_OFFSET(usr_regs[_num])
+
+static const unsigned long vcpu_reg_offsets[MODE_SYS + 1][16] = {
+ /* FIQ Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7),
+ REG_OFFSET(fiq_regs[1]), /* r8 */
+ REG_OFFSET(fiq_regs[1]), /* r9 */
+ REG_OFFSET(fiq_regs[2]), /* r10 */
+ REG_OFFSET(fiq_regs[3]), /* r11 */
+ REG_OFFSET(fiq_regs[4]), /* r12 */
+ REG_OFFSET(fiq_regs[5]), /* r13 */
+ REG_OFFSET(fiq_regs[6]), /* r14 */
+ REG_OFFSET(pc) /* r15 */
+ },
+
+ /* IRQ Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(irq_regs[0]), /* r13 */
+ REG_OFFSET(irq_regs[1]), /* r14 */
+ REG_OFFSET(pc) /* r15 */
+ },
+
+ /* SVC Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(svc_regs[0]), /* r13 */
+ REG_OFFSET(svc_regs[1]), /* r14 */
+ REG_OFFSET(pc) /* r15 */
+ },
+
+ /* ABT Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(abt_regs[0]), /* r13 */
+ REG_OFFSET(abt_regs[1]), /* r14 */
+ REG_OFFSET(pc) /* r15 */
+ },
+
+ /* UND Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(und_regs[0]), /* r13 */
+ REG_OFFSET(und_regs[1]), /* r14 */
+ REG_OFFSET(pc) /* r15 */
+ },
+
+ /* USR Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(usr_regs[13]), /* r13 */
+ REG_OFFSET(usr_regs[14]), /* r14 */
+ REG_OFFSET(pc) /* r15 */
+ },
+
+ /* SYS Registers */
+ {
+ USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2),
+ USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5),
+ USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8),
+ USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11),
+ USR_REG_OFFSET(12),
+ REG_OFFSET(usr_regs[13]), /* r13 */
+ REG_OFFSET(usr_regs[14]), /* r14 */
+ REG_OFFSET(pc) /* r15 */
+ },
+};
+
+/*
+ * Return a pointer to the register number valid in the specified mode of
+ * the virtual CPU.
+ */
+u32 *vcpu_reg_mode(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
+{
+ u32 *reg_array = (u32 *)&vcpu->arch.regs;
+
+ BUG_ON(reg_num > 15);
+ BUG_ON(mode > MODE_SYS);
+
+ return reg_array + vcpu_reg_offsets[mode][reg_num];
+}
+
+/******************************************************************************
+ * Utility functions common for all emulation code
+ *****************************************************************************/
+
+/*
+ * This one accepts a matrix where the first element is the
+ * bits as they must be, and the second element is the bitmask.
+ */
+#define INSTR_NONE -1
+static int kvm_instr_index(u32 instr, u32 table[][2], int table_entries)
+{
+ int i;
+ u32 mask;
+
+ for (i = 0; i < table_entries; i++) {
+ mask = table[i][1];
+ if ((table[i][0] & mask) == (instr & mask))
+ return i;
+ }
+ return INSTR_NONE;
+}
+
+/******************************************************************************
+ * Co-processor emulation
+ *****************************************************************************/
+
+struct coproc_params {
+ unsigned long CRn;
+ unsigned long CRm;
+ unsigned long Op1;
+ unsigned long Op2;
+ unsigned long Rt1;
+ unsigned long Rt2;
+ bool is_64bit;
+ bool is_write;
+};
+
+static void print_cp_instr(const struct coproc_params *p)
+{
+ /* Look, we even formatted it for you to paste into the table! */
+ if (p->is_64bit) {
+ kvm_err("{ CRn(DF), CRm(%2lu), Op1(%2lu), Op2(DF), is64, %-6s"
+ " func, arg},\n",
+ p->CRm, p->Op1, p->is_write ? "WRITE," : "READ,");
+ } else {
+ kvm_err("{ CRn(%2lu), CRm(%2lu), Op1(%2lu), Op2(%2lu), is32,"
+ " %-6s func, arg},\n",
+ p->CRn, p->CRm, p->Op1, p->Op2,
+ p->is_write ? "WRITE," : "READ,");
+ }
+}
+
+int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 0;
+}
+
+int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 0;
+}
+
+int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 0;
+}
+
+int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 0;
+}
+
+static bool ignore_write(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ unsigned long arg)
+{
+ if (arg)
+ trace_kvm_emulate_cp15_imp(p->Op1, p->Rt1, p->CRn, p->CRm,
+ p->Op2, p->is_write);
+ return true;
+}
+
+static bool read_zero(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ unsigned long arg)
+{
+ if (arg)
+ trace_kvm_emulate_cp15_imp(p->Op1, p->Rt1, p->CRn, p->CRm,
+ p->Op2, p->is_write);
+ *vcpu_reg(vcpu, p->Rt1) = 0;
+ return true;
+}
+
+static bool read_l2ctlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ unsigned long arg)
+{
+ u32 l2ctlr, ncores;
+
+ switch (kvm_target_cpu()) {
+ case CORTEX_A15:
+ asm volatile("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+ l2ctlr &= ~(3 << 24);
+ ncores = atomic_read(&vcpu->kvm->online_vcpus) - 1;
+ l2ctlr |= (ncores & 3) << 24;
+ *vcpu_reg(vcpu, p->Rt1) = l2ctlr;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool write_l2ctlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ unsigned long arg)
+{
+ return false;
+}
+
+static bool access_l2ectlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ unsigned long arg)
+{
+ switch (kvm_target_cpu()) {
+ case CORTEX_A15:
+ if (!p->is_write)
+ *vcpu_reg(vcpu, p->Rt1) = 0;
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool read_actlr(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ unsigned long arg)
+{
+ u32 actlr;
+
+ switch (kvm_target_cpu()) {
+ case CORTEX_A15:
+ asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+ /* Make the SMP bit consistent with the guest configuration */
+ if (atomic_read(&vcpu->kvm->online_vcpus) > 1)
+ actlr |= 1U << 6;
+ else
+ actlr &= ~(1U << 6);
+ *vcpu_reg(vcpu, p->Rt1) = actlr;
+ break;
+ default:
+ asm volatile("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+ *vcpu_reg(vcpu, p->Rt1) = actlr;
+ break;
+ }
+
+ return true;
+}
+
+static bool write_dcsw(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ unsigned long cp15_reg)
+{
+ u32 val;
+
+ val = *vcpu_reg(vcpu, p->Rt1);
+
+ switch (p->CRm) {
+ case 6: /* Upgrade DCISW to DCCISW, as per HCR.SWIO */
+ case 14: /* DCCISW */
+ asm volatile("mcr p15, 0, %0, c7, c14, 2" : : "r" (val));
+ break;
+
+ case 10: /* DCCSW */
+ asm volatile("mcr p15, 0, %0, c7, c10, 2" : : "r" (val));
+ break;
+ }
+
+ cpumask_setall(&vcpu->arch.require_dcache_flush);
+ cpumask_clear_cpu(vcpu->cpu, &vcpu->arch.require_dcache_flush);
+
+ return true;
+}
+
+static bool access_cp15_reg(struct kvm_vcpu *vcpu,
+ const struct coproc_params *p,
+ unsigned long cp15_reg)
+{
+ if (p->is_write)
+ vcpu->arch.cp15[cp15_reg] = *vcpu_reg(vcpu, p->Rt1);
+ else
+ *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.cp15[cp15_reg];
+ return true;
+}
+
+/* Any field which is 0xFFFFFFFF == DF */
+struct coproc_emulate {
+ unsigned long CRn;
+ unsigned long CRm;
+ unsigned long Op1;
+ unsigned long Op2;
+
+ unsigned long is_64;
+ unsigned long is_w;
+
+ bool (*f)(struct kvm_vcpu *,
+ const struct coproc_params *,
+ unsigned long);
+ unsigned long arg;
+};
+
+#define DF (-1UL) /* Default: If nothing else fits, use this one */
+#define CRn(_x) .CRn = _x
+#define CRm(_x) .CRm = _x
+#define Op1(_x) .Op1 = _x
+#define Op2(_x) .Op2 = _x
+#define is64 .is_64 = true
+#define is32 .is_64 = false
+#define READ .is_w = false
+#define WRITE .is_w = true
+#define RW .is_w = DF
+
+static const struct coproc_emulate coproc_emulate[] = {
+ /*
+ * ACTRL access:
+ *
+ * Ignore writes, and read returns the host settings.
+ */
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, WRITE, ignore_write},
+ { CRn( 1), CRm( 0), Op1( 0), Op2( 1), is32, READ, read_actlr},
+ /*
+ * DC{C,I,CI}SW operations:
+ */
+ { CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, WRITE, write_dcsw},
+ { CRn( 7), CRm( 6), Op1( 0), Op2( 2), is32, READ, read_zero},
+ { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, WRITE, write_dcsw},
+ { CRn( 7), CRm(10), Op1( 0), Op2( 2), is32, READ, read_zero},
+ { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, WRITE, write_dcsw},
+ { CRn( 7), CRm(14), Op1( 0), Op2( 2), is32, READ, read_zero},
+ /*
+ * L2CTLR access (guest wants to know #CPUs).
+ *
+ * FIXME: Hack Alert: Read zero as default case.
+ */
+ { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, READ, read_l2ctlr},
+ { CRn( 9), CRm( 0), Op1( 1), Op2( 2), is32, WRITE, write_l2ctlr},
+ { CRn( 9), CRm( 0), Op1( 1), Op2( 3), is32, READ, access_l2ectlr},
+ { CRn( 9), CRm(DF), Op1(DF), Op2(DF), is32, WRITE, ignore_write},
+ { CRn( 9), CRm(DF), Op1(DF), Op2(DF), is32, READ, read_zero},
+
+ /*
+ * These CRn == 10 entries may not need to exist - if we can
+ * ignore guest attempts to tamper with TLB lockdowns then it
+ * should be enough to store/restore the host/guest PRRR and
+ * NMRR memory remap registers and allow guest direct access
+ * to these registers.
+ *
+ * TLB Lockdown operations - ignored
+ */
+ { CRn(10), CRm( 0), Op1(DF), Op2(DF), is32, WRITE, ignore_write},
+ { CRn(10), CRm( 2), Op1( 0), Op2( 0), is32, RW, access_cp15_reg,
+ c10_PRRR},
+ { CRn(10), CRm( 2), Op1( 0), Op2( 1), is32, RW, access_cp15_reg,
+ c10_NMRR},
+
+ /*
+ * The CP15 c15 register is architecturally implementation
+ * defined, but some guest kernels attempt to read/write a
+ * diagnostics register here. We always return 0 and ignore
+ * writes and hope for the best.
+ */
+ { CRn(15), CRm(DF), Op1(DF), Op2(DF), is32, WRITE, ignore_write, 1},
+ { CRn(15), CRm(DF), Op1(DF), Op2(DF), is32, READ, read_zero, 1},
+};
+
+#undef is64
+#undef is32
+#undef READ
+#undef WRITE
+#undef RW
+
+static inline bool match(unsigned long val, unsigned long param)
+{
+ return param == DF || val == param;
+}
+
+static int emulate_cp15(struct kvm_vcpu *vcpu,
+ const struct coproc_params *params)
+{
+ unsigned long instr_len, i;
+
+ for (i = 0; i < ARRAY_SIZE(coproc_emulate); i++) {
+ const struct coproc_emulate *e = &coproc_emulate[i];
+
+ if (!match(params->is_64bit, e->is_64))
+ continue;
+ if (!match(params->is_write, e->is_w))
+ continue;
+ if (!match(params->CRn, e->CRn))
+ continue;
+ if (!match(params->CRm, e->CRm))
+ continue;
+ if (!match(params->Op1, e->Op1))
+ continue;
+ if (!match(params->Op2, e->Op2))
+ continue;
+
+ /* If function fails, it should complain. */
+ if (!e->f(vcpu, params, e->arg))
+ goto undef;
+
+ /* Skip instruction, since it was emulated */
+ instr_len = ((vcpu->arch.hsr >> 25) & 1) ? 4 : 2;
+ *vcpu_pc(vcpu) += instr_len;
+ kvm_adjust_itstate(vcpu);
+ return 0;
+ }
+
+ kvm_err("Unsupported guest CP15 access at: %08x\n",
+ vcpu->arch.regs.pc);
+ print_cp_instr(params);
+undef:
+ kvm_inject_undefined(vcpu);
+ return 0;
+}
+
+/**
+ * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run struct
+ */
+int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ struct coproc_params params;
+
+ params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
+ params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
+ params.is_write = ((vcpu->arch.hsr & 1) == 0);
+ params.is_64bit = true;
+
+ params.Op1 = (vcpu->arch.hsr >> 16) & 0xf;
+ params.Op2 = 0;
+ params.Rt2 = (vcpu->arch.hsr >> 10) & 0xf;
+ params.CRn = 0;
+
+ return emulate_cp15(vcpu, &params);
+}
+
+/**
+ * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access
+ * @vcpu: The VCPU pointer
+ * @run: The kvm_run struct
+ */
+int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ struct coproc_params params;
+
+ params.CRm = (vcpu->arch.hsr >> 1) & 0xf;
+ params.Rt1 = (vcpu->arch.hsr >> 5) & 0xf;
+ params.is_write = ((vcpu->arch.hsr & 1) == 0);
+ params.is_64bit = false;
+
+ params.CRn = (vcpu->arch.hsr >> 10) & 0xf;
+ params.Op1 = (vcpu->arch.hsr >> 14) & 0x7;
+ params.Op2 = (vcpu->arch.hsr >> 17) & 0x7;
+ params.Rt2 = 0;
+
+ return emulate_cp15(vcpu, &params);
+}
+
+/**
+ * kvm_handle_wfi - handle a wait-for-interrupts instruction executed by a guest
+ * @vcpu: the vcpu pointer
+ * @run: the kvm_run structure pointer
+ *
+ * Simply sets the wait_for_interrupts flag on the vcpu structure, which will
+ * halt execution of world-switches and schedule other host processes until
+ * there is an incoming IRQ or FIQ to the VM.
+ */
+int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ trace_kvm_wfi(vcpu->arch.regs.pc);
+ vcpu->stat.wfi_exits++;
+ if (!vcpu->arch.irq_lines)
+ vcpu->arch.wait_for_interrupts = 1;
+ return 0;
+}
+
+
+/******************************************************************************
+ * Load-Store instruction emulation
+ *****************************************************************************/
+
+/*
+ * Must be ordered with LOADS first and WRITES afterwards
+ * for easy distinction when doing MMIO.
+ */
+#define NUM_LD_INSTR 9
+enum INSTR_LS_INDEXES {
+ INSTR_LS_LDRBT, INSTR_LS_LDRT, INSTR_LS_LDR, INSTR_LS_LDRB,
+ INSTR_LS_LDRD, INSTR_LS_LDREX, INSTR_LS_LDRH, INSTR_LS_LDRSB,
+ INSTR_LS_LDRSH,
+ INSTR_LS_STRBT, INSTR_LS_STRT, INSTR_LS_STR, INSTR_LS_STRB,
+ INSTR_LS_STRD, INSTR_LS_STREX, INSTR_LS_STRH,
+ NUM_LS_INSTR
+};
+
+static u32 ls_instr[NUM_LS_INSTR][2] = {
+ {0x04700000, 0x0d700000}, /* LDRBT */
+ {0x04300000, 0x0d700000}, /* LDRT */
+ {0x04100000, 0x0c500000}, /* LDR */
+ {0x04500000, 0x0c500000}, /* LDRB */
+ {0x000000d0, 0x0e1000f0}, /* LDRD */
+ {0x01900090, 0x0ff000f0}, /* LDREX */
+ {0x001000b0, 0x0e1000f0}, /* LDRH */
+ {0x001000d0, 0x0e1000f0}, /* LDRSB */
+ {0x001000f0, 0x0e1000f0}, /* LDRSH */
+ {0x04600000, 0x0d700000}, /* STRBT */
+ {0x04200000, 0x0d700000}, /* STRT */
+ {0x04000000, 0x0c500000}, /* STR */
+ {0x04400000, 0x0c500000}, /* STRB */
+ {0x000000f0, 0x0e1000f0}, /* STRD */
+ {0x01800090, 0x0ff000f0}, /* STREX */
+ {0x000000b0, 0x0e1000f0} /* STRH */
+};
+
+static inline int get_arm_ls_instr_index(u32 instr)
+{
+ return kvm_instr_index(instr, ls_instr, NUM_LS_INSTR);
+}
+
+/*
+ * Load-Store instruction decoding
+ */
+#define INSTR_LS_TYPE_BIT 26
+#define INSTR_LS_RD_MASK 0x0000f000
+#define INSTR_LS_RD_SHIFT 12
+#define INSTR_LS_RN_MASK 0x000f0000
+#define INSTR_LS_RN_SHIFT 16
+#define INSTR_LS_RM_MASK 0x0000000f
+#define INSTR_LS_OFFSET12_MASK 0x00000fff
+
+#define INSTR_LS_BIT_P 24
+#define INSTR_LS_BIT_U 23
+#define INSTR_LS_BIT_B 22
+#define INSTR_LS_BIT_W 21
+#define INSTR_LS_BIT_L 20
+#define INSTR_LS_BIT_S 6
+#define INSTR_LS_BIT_H 5
+
+/*
+ * ARM addressing mode defines
+ */
+#define OFFSET_IMM_MASK 0x0e000000
+#define OFFSET_IMM_VALUE 0x04000000
+#define OFFSET_REG_MASK 0x0e000ff0
+#define OFFSET_REG_VALUE 0x06000000
+#define OFFSET_SCALE_MASK 0x0e000010
+#define OFFSET_SCALE_VALUE 0x06000000
+
+#define SCALE_SHIFT_MASK 0x000000a0
+#define SCALE_SHIFT_SHIFT 5
+#define SCALE_SHIFT_LSL 0x0
+#define SCALE_SHIFT_LSR 0x1
+#define SCALE_SHIFT_ASR 0x2
+#define SCALE_SHIFT_ROR_RRX 0x3
+#define SCALE_SHIFT_IMM_MASK 0x00000f80
+#define SCALE_SHIFT_IMM_SHIFT 6
+
+#define PSR_BIT_C 29
+
+static unsigned long ls_word_calc_offset(struct kvm_vcpu *vcpu,
+ unsigned long instr)
+{
+ int offset = 0;
+
+ if ((instr & OFFSET_IMM_MASK) == OFFSET_IMM_VALUE) {
+ /* Immediate offset/index */
+ offset = instr & INSTR_LS_OFFSET12_MASK;
+
+ if (!(instr & (1U << INSTR_LS_BIT_U)))
+ offset = -offset;
+ }
+
+ if ((instr & OFFSET_REG_MASK) == OFFSET_REG_VALUE) {
+ /* Register offset/index */
+ u8 rm = instr & INSTR_LS_RM_MASK;
+ offset = *vcpu_reg(vcpu, rm);
+
+ if (!(instr & (1U << INSTR_LS_BIT_P)))
+ offset = 0;
+ }
+
+ if ((instr & OFFSET_SCALE_MASK) == OFFSET_SCALE_VALUE) {
+ /* Scaled register offset */
+ u8 rm = instr & INSTR_LS_RM_MASK;
+ u8 shift = (instr & SCALE_SHIFT_MASK) >> SCALE_SHIFT_SHIFT;
+ u32 shift_imm = (instr & SCALE_SHIFT_IMM_MASK)
+ >> SCALE_SHIFT_IMM_SHIFT;
+ offset = *vcpu_reg(vcpu, rm);
+
+ switch (shift) {
+ case SCALE_SHIFT_LSL:
+ offset = offset << shift_imm;
+ break;
+ case SCALE_SHIFT_LSR:
+ if (shift_imm == 0)
+ offset = 0;
+ else
+ offset = ((u32)offset) >> shift_imm;
+ break;
+ case SCALE_SHIFT_ASR:
+ if (shift_imm == 0) {
+ if (offset & (1U << 31))
+ offset = 0xffffffff;
+ else
+ offset = 0;
+ } else {
+ /* Ensure arithmetic shift */
+ asm("mov %[r], %[op], ASR %[s]" :
+ [r] "=r" (offset) :
+ [op] "r" (offset), [s] "r" (shift_imm));
+ }
+ break;
+ case SCALE_SHIFT_ROR_RRX:
+ if (shift_imm == 0) {
+ u32 C = (vcpu->arch.regs.cpsr &
+ (1U << PSR_BIT_C));
+ offset = (C << 31) | offset >> 1;
+ } else {
+ /* Ensure arithmetic shift */
+ asm("mov %[r], %[op], ASR %[s]" :
+ [r] "=r" (offset) :
+ [op] "r" (offset), [s] "r" (shift_imm));
+ }
+ break;
+ }
+
+ if (instr & (1U << INSTR_LS_BIT_U))
+ return offset;
+ else
+ return -offset;
+ }
+
+ if (instr & (1U << INSTR_LS_BIT_U))
+ return offset;
+ else
+ return -offset;
+
+ BUG();
+}
+
+static int kvm_ls_length(struct kvm_vcpu *vcpu, u32 instr)
+{
+ int index;
+
+ index = get_arm_ls_instr_index(instr);
+
+ if (instr & (1U << INSTR_LS_TYPE_BIT)) {
+ /* LS word or unsigned byte */
+ if (instr & (1U << INSTR_LS_BIT_B))
+ return sizeof(unsigned char);
+ else
+ return sizeof(u32);
+ } else {
+ /* LS halfword, doubleword or signed byte */
+ u32 H = (instr & (1U << INSTR_LS_BIT_H));
+ u32 S = (instr & (1U << INSTR_LS_BIT_S));
+ u32 L = (instr & (1U << INSTR_LS_BIT_L));
+
+ if (!L && S) {
+ kvm_err("WARNING: d-word for MMIO\n");
+ return 2 * sizeof(u32);
+ } else if (L && S && !H)
+ return sizeof(char);
+ else
+ return sizeof(u16);
+ }
+
+ BUG();
+}
+
+/**
+ * kvm_emulate_mmio_ls - emulates load/store instructions made to I/O memory
+ * @vcpu: The vcpu pointer
+ * @fault_ipa: The IPA that caused the 2nd stage fault
+ * @instr: The instruction that caused the fault
+ *
+ * Handles emulation of load/store instructions which cannot be emulated through
+ * information found in the HSR on faults. It is necessary in this case to
+ * simply decode the offending instruction in software and determine the
+ * required operands.
+ */
+int kvm_emulate_mmio_ls(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ unsigned long instr)
+{
+ unsigned long rd, rn, offset, len, instr_len;
+ int index;
+ bool is_write, is_thumb;
+
+ trace_kvm_mmio_emulate(vcpu->arch.regs.pc, instr, vcpu->arch.regs.cpsr);
+
+ index = get_arm_ls_instr_index(instr);
+ if (index == INSTR_NONE) {
+ kvm_err("Unknown load/store instruction\n");
+ return -EINVAL;
+ }
+
+ is_write = (index < NUM_LD_INSTR) ? false : true;
+ rd = (instr & INSTR_LS_RD_MASK) >> INSTR_LS_RD_SHIFT;
+ len = kvm_ls_length(vcpu, instr);
+
+ vcpu->run->mmio.is_write = is_write;
+ vcpu->run->mmio.phys_addr = fault_ipa;
+ vcpu->run->mmio.len = len;
+ vcpu->arch.mmio_sign_extend = false;
+ vcpu->arch.mmio_rd = rd;
+
+ trace_kvm_mmio((is_write) ? KVM_TRACE_MMIO_WRITE :
+ KVM_TRACE_MMIO_READ_UNSATISFIED,
+ len, fault_ipa, (is_write) ? *vcpu_reg(vcpu, rd) : 0);
+
+ /* Handle base register writeback */
+ if (!(instr & (1U << INSTR_LS_BIT_P)) ||
+ (instr & (1U << INSTR_LS_BIT_W))) {
+ rn = (instr & INSTR_LS_RN_MASK) >> INSTR_LS_RN_SHIFT;
+ offset = ls_word_calc_offset(vcpu, instr);
+ *vcpu_reg(vcpu, rn) += offset;
+ }
+
+ /*
+ * The MMIO instruction is emulated and should not be re-executed
+ * in the guest.
+ */
+ is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_T_BIT);
+ if (is_thumb && !is_wide_instruction(instr))
+ instr_len = 2;
+ else
+ instr_len = 4;
+
+ *vcpu_pc(vcpu) += instr_len;
+ kvm_adjust_itstate(vcpu);
+ return KVM_EXIT_MMIO;
+}
+
+/**
+ * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block
+ * @vcpu: The VCPU pointer
+ *
+ * When exceptions occur while instructions are executed in Thumb IF-THEN
+ * blocks, the ITSTATE field of the CPSR is not advanved (updated), so we have
+ * to do this little bit of work manually. The fields map like this:
+ *
+ * IT[7:0] -> CPSR[26:25],CPSR[15:10]
+ */
+void kvm_adjust_itstate(struct kvm_vcpu *vcpu)
+{
+ unsigned long itbits, cond;
+ unsigned long cpsr = *vcpu_cpsr(vcpu);
+ bool is_arm = !(cpsr & PSR_T_BIT);
+
+ BUG_ON(is_arm && (cpsr & PSR_IT_MASK));
+
+ if (!(cpsr & PSR_IT_MASK))
+ return;
+
+ cond = (cpsr & 0xe000) >> 13;
+ itbits = (cpsr & 0x1c00) >> (10 - 2);
+ itbits |= (cpsr & (0x3 << 25)) >> 25;
+
+ /* Perform ITAdvance (see page A-52 in ARM DDI 0406C) */
+ if ((itbits & 0x7) == 0)
+ itbits = cond = 0;
+ else
+ itbits = (itbits << 1) & 0x1f;
+
+ cpsr &= ~PSR_IT_MASK;
+ cpsr |= cond << 13;
+ cpsr |= (itbits & 0x1c) << (10 - 2);
+ cpsr |= (itbits & 0x3) << 25;
+ *vcpu_cpsr(vcpu) = cpsr;
+}
+
+/******************************************************************************
+ * Inject exceptions into the guest
+ */
+
+static u32 exc_vector_base(struct kvm_vcpu *vcpu)
+{
+ u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
+ u32 vbar = vcpu->arch.cp15[c12_VBAR];
+
+ if (sctlr & SCTLR_V)
+ return 0xffff0000;
+ else /* always have security exceptions */
+ return vbar;
+}
+
+/**
+ * kvm_inject_undefined - inject an undefined exception into the guest
+ * @vcpu: The VCPU to receive the undefined exception
+ *
+ * It is assumed that this code is called from the VCPU thread and that the
+ * VCPU therefore is not currently executing guest code.
+ *
+ * Modelled after TakeUndefInstrException() pseudocode.
+ */
+void kvm_inject_undefined(struct kvm_vcpu *vcpu)
+{
+ u32 new_lr_value;
+ u32 new_spsr_value;
+ u32 cpsr = *vcpu_cpsr(vcpu);
+ u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
+ bool is_thumb = (cpsr & PSR_T_BIT);
+ u32 vect_offset = 4;
+ u32 return_offset = (is_thumb) ? 2 : 4;
+
+ new_spsr_value = cpsr;
+ new_lr_value = *vcpu_pc(vcpu) - return_offset;
+
+ *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | UND_MODE;
+ *vcpu_cpsr(vcpu) |= PSR_I_BIT;
+ *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
+
+ if (sctlr & SCTLR_TE)
+ *vcpu_cpsr(vcpu) |= PSR_T_BIT;
+ if (sctlr & SCTLR_EE)
+ *vcpu_cpsr(vcpu) |= PSR_E_BIT;
+
+ /* Note: These now point to UND banked copies */
+ *vcpu_spsr(vcpu) = cpsr;
+ *vcpu_reg(vcpu, 14) = new_lr_value;
+
+ /* Branch to exception vector */
+ *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
+}
diff --git a/arch/arm/kvm/exports.c b/arch/arm/kvm/exports.c
new file mode 100644
index 0000000000000..f39f82380a981
--- /dev/null
+++ b/arch/arm/kvm/exports.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/module.h>
+#include <asm/kvm_asm.h>
+
+EXPORT_SYMBOL_GPL(__kvm_hyp_init);
+EXPORT_SYMBOL_GPL(__kvm_hyp_init_end);
+
+EXPORT_SYMBOL_GPL(__kvm_hyp_exit);
+EXPORT_SYMBOL_GPL(__kvm_hyp_exit_end);
+
+EXPORT_SYMBOL_GPL(__kvm_hyp_vector);
+
+EXPORT_SYMBOL_GPL(__kvm_hyp_code_start);
+EXPORT_SYMBOL_GPL(__kvm_hyp_code_end);
+
+EXPORT_SYMBOL_GPL(__kvm_vcpu_run);
+
+EXPORT_SYMBOL_GPL(__kvm_flush_vm_context);
+EXPORT_SYMBOL_GPL(__kvm_tlb_flush_vmid);
+
+EXPORT_SYMBOL_GPL(smp_send_reschedule);
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
new file mode 100644
index 0000000000000..57c0e296da551
--- /dev/null
+++ b/arch/arm/kvm/guest.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+
+#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
+#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+ /* vcpu stats */
+ VCPU_STAT(exits),
+ VCPU_STAT(mmio_exits),
+ VCPU_STAT(kmmio_exits),
+ VCPU_STAT(wfi_exits),
+ VCPU_STAT(irq_exits),
+ VCPU_STAT(halt_wakeup),
+
+ /* vm stats */
+ VM_STAT(remote_tlb_flush),
+ { NULL }
+};
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ struct kvm_vcpu_regs *vcpu_regs = &vcpu->arch.regs;
+
+ /*
+ * GPRs and PSRs
+ */
+ memcpy(regs->regs0_7, &(vcpu_regs->usr_regs[0]), sizeof(u32) * 8);
+ memcpy(regs->usr_regs8_12, &(vcpu_regs->usr_regs[8]), sizeof(u32) * 5);
+ memcpy(regs->fiq_regs8_12, &(vcpu_regs->fiq_regs[0]), sizeof(u32) * 5);
+ regs->reg13[MODE_FIQ] = vcpu_regs->fiq_regs[5];
+ regs->reg14[MODE_FIQ] = vcpu_regs->fiq_regs[6];
+ regs->reg13[MODE_IRQ] = vcpu_regs->irq_regs[0];
+ regs->reg14[MODE_IRQ] = vcpu_regs->irq_regs[1];
+ regs->reg13[MODE_SVC] = vcpu_regs->svc_regs[0];
+ regs->reg14[MODE_SVC] = vcpu_regs->svc_regs[1];
+ regs->reg13[MODE_ABT] = vcpu_regs->abt_regs[0];
+ regs->reg14[MODE_ABT] = vcpu_regs->abt_regs[1];
+ regs->reg13[MODE_UND] = vcpu_regs->und_regs[0];
+ regs->reg14[MODE_UND] = vcpu_regs->und_regs[1];
+ regs->reg13[MODE_USR] = vcpu_regs->usr_regs[0];
+ regs->reg14[MODE_USR] = vcpu_regs->usr_regs[1];
+
+ regs->spsr[MODE_FIQ] = vcpu_regs->fiq_regs[7];
+ regs->spsr[MODE_IRQ] = vcpu_regs->irq_regs[2];
+ regs->spsr[MODE_SVC] = vcpu_regs->svc_regs[2];
+ regs->spsr[MODE_ABT] = vcpu_regs->abt_regs[2];
+ regs->spsr[MODE_UND] = vcpu_regs->und_regs[2];
+
+ regs->reg15 = vcpu_regs->pc;
+ regs->cpsr = vcpu_regs->cpsr;
+
+
+ /*
+ * Co-processor registers.
+ */
+ regs->cp15.c0_midr = vcpu->arch.cp15[c0_MIDR];
+ regs->cp15.c1_sys = vcpu->arch.cp15[c1_SCTLR];
+ regs->cp15.c2_base0 = vcpu->arch.cp15[c2_TTBR0];
+ regs->cp15.c2_base1 = vcpu->arch.cp15[c2_TTBR1];
+ regs->cp15.c2_control = vcpu->arch.cp15[c2_TTBCR];
+ regs->cp15.c3_dacr = vcpu->arch.cp15[c3_DACR];
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ struct kvm_vcpu_regs *vcpu_regs = &vcpu->arch.regs;
+
+ if (__vcpu_mode(regs->cpsr) == 0xf)
+ return -EINVAL;
+
+ memcpy(&(vcpu_regs->usr_regs[0]), regs->regs0_7, sizeof(u32) * 8);
+ memcpy(&(vcpu_regs->usr_regs[8]), regs->usr_regs8_12, sizeof(u32) * 5);
+ memcpy(&(vcpu_regs->fiq_regs[0]), regs->fiq_regs8_12, sizeof(u32) * 5);
+
+ vcpu_regs->fiq_regs[5] = regs->reg13[MODE_FIQ];
+ vcpu_regs->fiq_regs[6] = regs->reg14[MODE_FIQ];
+ vcpu_regs->irq_regs[0] = regs->reg13[MODE_IRQ];
+ vcpu_regs->irq_regs[1] = regs->reg14[MODE_IRQ];
+ vcpu_regs->svc_regs[0] = regs->reg13[MODE_SVC];
+ vcpu_regs->svc_regs[1] = regs->reg14[MODE_SVC];
+ vcpu_regs->abt_regs[0] = regs->reg13[MODE_ABT];
+ vcpu_regs->abt_regs[1] = regs->reg14[MODE_ABT];
+ vcpu_regs->und_regs[0] = regs->reg13[MODE_UND];
+ vcpu_regs->und_regs[1] = regs->reg14[MODE_UND];
+ vcpu_regs->usr_regs[0] = regs->reg13[MODE_USR];
+ vcpu_regs->usr_regs[1] = regs->reg14[MODE_USR];
+
+ vcpu_regs->fiq_regs[7] = regs->spsr[MODE_FIQ];
+ vcpu_regs->irq_regs[2] = regs->spsr[MODE_IRQ];
+ vcpu_regs->svc_regs[2] = regs->spsr[MODE_SVC];
+ vcpu_regs->abt_regs[2] = regs->spsr[MODE_ABT];
+ vcpu_regs->und_regs[2] = regs->spsr[MODE_UND];
+
+ /*
+ * Co-processor registers.
+ */
+ vcpu->arch.cp15[c0_MIDR] = regs->cp15.c0_midr;
+ vcpu->arch.cp15[c1_SCTLR] = regs->cp15.c1_sys;
+
+ vcpu_regs->pc = regs->reg15;
+ vcpu_regs->cpsr = regs->cpsr;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL;
+}
diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S
new file mode 100644
index 0000000000000..4db26cb536c2d
--- /dev/null
+++ b/arch/arm/kvm/init.S
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unified.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Hypervisor initialization
+@ - should be called with:
+@ r0 = Hypervisor pgd pointer
+@ r1 = top of Hyp stack (kernel VA)
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ .text
+ .arm
+ .pushsection .hyp.idmap.text,"ax"
+ .align 12
+__kvm_hyp_init:
+ .globl __kvm_hyp_init
+
+ @ Hyp-mode exception vector
+ nop
+ nop
+ nop
+ nop
+ nop
+ b __do_hyp_init
+ nop
+ nop
+
+__do_hyp_init:
+ @ Set the sp to end of this page and push data for later use
+ mov sp, pc
+ bic sp, sp, #0x0ff
+ bic sp, sp, #0xf00
+ add sp, sp, #0x1000
+ push {r0, r1, r2, r12}
+
+ @ Set the HTTBR to point to the hypervisor PGD pointer passed to
+ @ function and set the upper bits equal to the kernel PGD.
+ mrrc p15, 1, r1, r2, c2
+ mcrr p15, 4, r0, r2, c2
+
+ @ Set the HTCR and VTCR to the same shareability and cacheability
+ @ settings as the non-secure TTBCR and with T0SZ == 0.
+ mrc p15, 4, r0, c2, c0, 2 @ HTCR
+ ldr r12, =HTCR_MASK
+ bic r0, r0, r12
+ mrc p15, 0, r1, c2, c0, 2 @ TTBCR
+ and r1, r1, #(HTCR_MASK & ~TTBCR_T0SZ)
+ orr r0, r0, r1
+ mcr p15, 4, r0, c2, c0, 2 @ HTCR
+
+ mrc p15, 4, r1, c2, c1, 2 @ VTCR
+ bic r1, r1, #(VTCR_HTCR_SH | VTCR_SL0)
+ bic r0, r0, #(~VTCR_HTCR_SH)
+ orr r1, r0, r1
+ orr r1, r1, #(VTCR_SL_L1 | VTCR_GUEST_T0SZ)
+ mcr p15, 4, r1, c2, c1, 2 @ VTCR
+
+ @ Use the same memory attributes for hyp. accesses as the kernel
+ @ (copy MAIRx ro HMAIRx).
+ mrc p15, 0, r0, c10, c2, 0
+ mcr p15, 4, r0, c10, c2, 0
+ mrc p15, 0, r0, c10, c2, 1
+ mcr p15, 4, r0, c10, c2, 1
+
+ @ Set the HSCTLR to:
+ @ - ARM/THUMB exceptions: Kernel config (Thumb-2 kernel)
+ @ - Endianness: Kernel config
+ @ - Fast Interrupt Features: Kernel config
+ @ - Write permission implies XN: disabled
+ @ - Instruction cache: enabled
+ @ - Data/Unified cache: enabled
+ @ - Memory alignment checks: enabled
+ @ - MMU: enabled (this code must be run from an identity mapping)
+ mrc p15, 4, r0, c1, c0, 0 @ HSCR
+ ldr r12, =HSCTLR_MASK
+ bic r0, r0, r12
+ mrc p15, 0, r1, c1, c0, 0 @ SCTLR
+ ldr r12, =(HSCTLR_EE | HSCTLR_FI)
+ and r1, r1, r12
+ ARM( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_I) )
+ THUMB( ldr r12, =(HSCTLR_M | HSCTLR_A | HSCTLR_I | HSCTLR_TE) )
+ orr r1, r1, r12
+ orr r0, r0, r1
+ isb
+ mcr p15, 4, r0, c1, c0, 0 @ HSCR
+ isb
+
+ @ Set stack pointer and return to the kernel
+ pop {r0, r1, r2, r12}
+ mov sp, r1
+ eret
+
+ .ltorg
+
+ .align 12
+
+ __kvm_init_sp:
+ .globl __kvm_hyp_init_end
+__kvm_hyp_init_end:
+
+ .align 12
+__kvm_hyp_exit:
+ .globl __kvm_hyp_exit
+
+ @ Hyp-mode exception vector
+ nop
+ nop
+ nop
+ nop
+ nop
+ b __do_hyp_exit
+ nop
+ nop
+
+__do_hyp_exit:
+ @ Clear the MMU and TE bits in the HSCR
+ mrc p15, 4, sp, c1, c0, 0 @ HSCR
+ bic sp, sp, #((1 << 30) | (1 << 0))
+
+ isb
+ mcr p15, 4, sp, c1, c0, 0 @ HSCR
+ mcr p15, 4, r0, c8, c7, 0 @ Flush Hyp TLB, r0 ignored
+ isb
+ eret
+
+ .globl __kvm_hyp_exit_end
+__kvm_hyp_exit_end:
+
+ .popsection
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
new file mode 100644
index 0000000000000..fd7331c6b4698
--- /dev/null
+++ b/arch/arm/kvm/interrupts.S
@@ -0,0 +1,698 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/unified.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_arm.h>
+
+#define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4))
+#define VCPU_USR_SP (VCPU_USR_REG(13))
+#define VCPU_FIQ_REG(_reg_nr) (VCPU_FIQ_REGS + (_reg_nr * 4))
+#define VCPU_FIQ_SPSR (VCPU_FIQ_REG(7))
+
+ .text
+ .align PAGE_SHIFT
+
+__kvm_hyp_code_start:
+ .globl __kvm_hyp_code_start
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Flush per-VMID TLBs
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+/*
+ * void __kvm_tlb_flush_vmid(struct kvm *kvm);
+ *
+ * We rely on the hardware to broadcast the TLB invalidation to all CPUs
+ * inside the inner-shareable domain (which is the case for all v7
+ * implementations). If we come across a non-IS SMP implementation, we'll
+ * have to use an IPI based mechanism. Until then, we stick to the simple
+ * hardware assisted version.
+ */
+ENTRY(__kvm_tlb_flush_vmid)
+ hvc #0 @ Switch to Hyp mode
+ push {r2, r3}
+
+ add r0, r0, #KVM_VTTBR
+ ldrd r2, r3, [r0]
+ mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+ isb
+ mcr p15, 0, r0, c8, c3, 0 @ TLBIALLIS (rt ignored)
+ dsb
+ isb
+ mov r2, #0
+ mov r3, #0
+ mcrr p15, 6, r2, r3, c2 @ Back to VMID #0
+ isb
+
+ pop {r2, r3}
+ hvc #0 @ Back to SVC
+ bx lr
+ENDPROC(__kvm_tlb_flush_vmid)
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Flush TLBs and instruction caches of current CPU for all VMIDs
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+/*
+ * void __kvm_flush_vm_context(void);
+ */
+ENTRY(__kvm_flush_vm_context)
+ hvc #0 @ switch to hyp-mode
+
+ mov r0, #0 @ rn parameter for c15 flushes is SBZ
+ mcr p15, 4, r0, c8, c7, 4 @ Invalidate Non-secure Non-Hyp TLB
+ mcr p15, 0, r0, c7, c5, 0 @ Invalidate instruction caches
+ dsb
+ isb
+
+ hvc #0 @ switch back to svc-mode, see hyp_svc
+ bx lr
+ENDPROC(__kvm_flush_vm_context)
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Hypervisor world-switch code
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+/* These are simply for the macros to work - value don't have meaning */
+.equ usr, 0
+.equ svc, 1
+.equ abt, 2
+.equ und, 3
+.equ irq, 4
+.equ fiq, 5
+
+.macro store_mode_state base_reg, mode
+ .if \mode == usr
+ mrs r2, SP_usr
+ mov r3, lr
+ stmdb \base_reg!, {r2, r3}
+ .elseif \mode != fiq
+ mrs r2, SP_\mode
+ mrs r3, LR_\mode
+ mrs r4, SPSR_\mode
+ stmdb \base_reg!, {r2, r3, r4}
+ .else
+ mrs r2, r8_fiq
+ mrs r3, r9_fiq
+ mrs r4, r10_fiq
+ mrs r5, r11_fiq
+ mrs r6, r12_fiq
+ mrs r7, SP_fiq
+ mrs r8, LR_fiq
+ mrs r9, SPSR_fiq
+ stmdb \base_reg!, {r2-r9}
+ .endif
+.endm
+
+.macro load_mode_state base_reg, mode
+ .if \mode == usr
+ ldmia \base_reg!, {r2, r3}
+ msr SP_usr, r2
+ mov lr, r3
+ .elseif \mode != fiq
+ ldmia \base_reg!, {r2, r3, r4}
+ msr SP_\mode, r2
+ msr LR_\mode, r3
+ msr SPSR_\mode, r4
+ .else
+ ldmia \base_reg!, {r2-r9}
+ msr r8_fiq, r2
+ msr r9_fiq, r3
+ msr r10_fiq, r4
+ msr r11_fiq, r5
+ msr r12_fiq, r6
+ msr SP_fiq, r7
+ msr LR_fiq, r8
+ msr SPSR_fiq, r9
+ .endif
+.endm
+
+/* Reads cp15 registers from hardware and stores them in memory
+ * @vcpu: If 0, registers are written in-order to the stack,
+ * otherwise to the VCPU struct pointed to by vcpup
+ * @vcpup: Register pointing to VCPU struct
+ */
+.macro read_cp15_state vcpu=0, vcpup
+ mrc p15, 0, r2, c1, c0, 0 @ SCTLR
+ mrc p15, 0, r3, c1, c0, 2 @ CPACR
+ mrc p15, 0, r4, c2, c0, 2 @ TTBCR
+ mrc p15, 0, r5, c3, c0, 0 @ DACR
+ mrrc p15, 0, r6, r7, c2 @ TTBR 0
+ mrrc p15, 1, r8, r9, c2 @ TTBR 1
+ mrc p15, 0, r10, c10, c2, 0 @ PRRR
+ mrc p15, 0, r11, c10, c2, 1 @ NMRR
+
+ .if \vcpu == 0
+ push {r2-r11} @ Push CP15 registers
+ .else
+ str r2, [\vcpup, #VCPU_SCTLR]
+ str r3, [\vcpup, #VCPU_CPACR]
+ str r4, [\vcpup, #VCPU_TTBCR]
+ str r5, [\vcpup, #VCPU_DACR]
+ add \vcpup, \vcpup, #VCPU_TTBR0
+ strd r6, r7, [\vcpup]
+ add \vcpup, \vcpup, #(VCPU_TTBR1 - VCPU_TTBR0)
+ strd r8, r9, [\vcpup]
+ sub \vcpup, \vcpup, #(VCPU_TTBR1)
+ str r10, [\vcpup, #VCPU_PRRR]
+ str r11, [\vcpup, #VCPU_NMRR]
+ .endif
+
+ mrc p15, 0, r2, c13, c0, 1 @ CID
+ mrc p15, 0, r3, c13, c0, 2 @ TID_URW
+ mrc p15, 0, r4, c13, c0, 3 @ TID_URO
+ mrc p15, 0, r5, c13, c0, 4 @ TID_PRIV
+ mrc p15, 0, r6, c5, c0, 0 @ DFSR
+ mrc p15, 0, r7, c5, c0, 1 @ IFSR
+ mrc p15, 0, r8, c5, c1, 0 @ ADFSR
+ mrc p15, 0, r9, c5, c1, 1 @ AIFSR
+ mrc p15, 0, r10, c6, c0, 0 @ DFAR
+ mrc p15, 0, r11, c6, c0, 2 @ IFAR
+ mrc p15, 0, r12, c12, c0, 0 @ VBAR
+
+ .if \vcpu == 0
+ push {r2-r12} @ Push CP15 registers
+ .else
+ str r2, [\vcpup, #VCPU_CID]
+ str r3, [\vcpup, #VCPU_TID_URW]
+ str r4, [\vcpup, #VCPU_TID_URO]
+ str r5, [\vcpup, #VCPU_TID_PRIV]
+ str r6, [\vcpup, #VCPU_DFSR]
+ str r7, [\vcpup, #VCPU_IFSR]
+ str r8, [\vcpup, #VCPU_ADFSR]
+ str r9, [\vcpup, #VCPU_AIFSR]
+ str r10, [\vcpup, #VCPU_DFAR]
+ str r11, [\vcpup, #VCPU_IFAR]
+ str r12, [\vcpup, #VCPU_VBAR]
+ .endif
+.endm
+
+/* Reads cp15 registers from memory and writes them to hardware
+ * @vcpu: If 0, registers are read in-order from the stack,
+ * otherwise from the VCPU struct pointed to by vcpup
+ * @vcpup: Register pointing to VCPU struct
+ */
+.macro write_cp15_state vcpu=0, vcpup
+ .if \vcpu == 0
+ pop {r2-r12}
+ .else
+ ldr r2, [\vcpup, #VCPU_CID]
+ ldr r3, [\vcpup, #VCPU_TID_URW]
+ ldr r4, [\vcpup, #VCPU_TID_URO]
+ ldr r5, [\vcpup, #VCPU_TID_PRIV]
+ ldr r6, [\vcpup, #VCPU_DFSR]
+ ldr r7, [\vcpup, #VCPU_IFSR]
+ ldr r8, [\vcpup, #VCPU_ADFSR]
+ ldr r9, [\vcpup, #VCPU_AIFSR]
+ ldr r10, [\vcpup, #VCPU_DFAR]
+ ldr r11, [\vcpup, #VCPU_IFAR]
+ ldr r12, [\vcpup, #VCPU_VBAR]
+ .endif
+
+ mcr p15, 0, r2, c13, c0, 1 @ CID
+ mcr p15, 0, r3, c13, c0, 2 @ TID_URW
+ mcr p15, 0, r4, c13, c0, 3 @ TID_URO
+ mcr p15, 0, r5, c13, c0, 4 @ TID_PRIV
+ mcr p15, 0, r6, c5, c0, 0 @ DFSR
+ mcr p15, 0, r7, c5, c0, 1 @ IFSR
+ mcr p15, 0, r8, c5, c1, 0 @ ADFSR
+ mcr p15, 0, r9, c5, c1, 1 @ AIFSR
+ mcr p15, 0, r10, c6, c0, 0 @ DFAR
+ mcr p15, 0, r11, c6, c0, 2 @ IFAR
+ mcr p15, 0, r12, c12, c0, 0 @ VBAR
+
+ .if \vcpu == 0
+ pop {r2-r11}
+ .else
+ ldr r2, [\vcpup, #VCPU_SCTLR]
+ ldr r3, [\vcpup, #VCPU_CPACR]
+ ldr r4, [\vcpup, #VCPU_TTBCR]
+ ldr r5, [\vcpup, #VCPU_DACR]
+ add \vcpup, \vcpup, #VCPU_TTBR0
+ ldrd r6, r7, [\vcpup]
+ add \vcpup, \vcpup, #(VCPU_TTBR1 - VCPU_TTBR0)
+ ldrd r8, r9, [\vcpup]
+ sub \vcpup, \vcpup, #(VCPU_TTBR1)
+ ldr r10, [\vcpup, #VCPU_PRRR]
+ ldr r11, [\vcpup, #VCPU_NMRR]
+ .endif
+
+ mcr p15, 0, r2, c1, c0, 0 @ SCTLR
+ mcr p15, 0, r3, c1, c0, 2 @ CPACR
+ mcr p15, 0, r4, c2, c0, 2 @ TTBCR
+ mcr p15, 0, r5, c3, c0, 0 @ DACR
+ mcrr p15, 0, r6, r7, c2 @ TTBR 0
+ mcrr p15, 1, r8, r9, c2 @ TTBR 1
+ mcr p15, 0, r10, c10, c2, 0 @ PRRR
+ mcr p15, 0, r11, c10, c2, 1 @ NMRR
+.endm
+
+/* Configures the HSTR (Hyp System Trap Register) on entry/return
+ * (hardware reset value is 0) */
+.macro set_hstr entry
+ mrc p15, 4, r2, c1, c1, 3
+ ldr r3, =(HSTR_T(9) | HSTR_T(10) | HSTR_T(11) | HSTR_T(15))
+ .if \entry == 1
+ orr r2, r2, r3 @ Trap CR{9,10,11,15}
+ .else
+ bic r2, r2, r3 @ Don't trap any CRx accesses
+ .endif
+ mcr p15, 4, r2, c1, c1, 3
+.endm
+
+/* Configures the HCPTR (Hyp Coprocessor Trap Register) on entry/return
+ * (hardware reset value is 0) */
+.macro set_hcptr entry
+ mrc p15, 4, r2, c1, c1, 2
+ ldr r3, =(HCPTR_TTA)
+ .if \entry == 1
+ orr r2, r2, r3 @ Trap some coproc-accesses
+ .else
+ bic r2, r2, r3 @ Don't trap any coproc- accesses
+ .endif
+ mcr p15, 4, r2, c1, c1, 2
+.endm
+
+/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi, trap smc */
+.macro configure_hyp_role entry, vcpu_ptr
+ mrc p15, 4, r2, c1, c1, 0 @ HCR
+ bic r2, r2, #HCR_VIRT_EXCP_MASK
+ ldr r3, =HCR_GUEST_MASK
+ .if \entry == 1
+ orr r2, r2, r3
+ ldr r3, [\vcpu_ptr, #VCPU_IRQ_LINES]
+ orr r2, r2, r3
+ .else
+ bic r2, r2, r3
+ .endif
+ mcr p15, 4, r2, c1, c1, 0
+.endm
+
+@ Arguments:
+@ r0: pointer to vcpu struct
+ENTRY(__kvm_vcpu_run)
+ hvc #0 @ switch to hyp-mode
+
+ @ Now we're in Hyp-mode and lr_usr, spsr_hyp are on the stack
+ mrs r2, sp_usr
+ push {r2} @ Push r13_usr
+ push {r4-r12} @ Push r4-r12
+
+ store_mode_state sp, svc
+ store_mode_state sp, abt
+ store_mode_state sp, und
+ store_mode_state sp, irq
+ store_mode_state sp, fiq
+
+ @ Store hardware CP15 state and load guest state
+ read_cp15_state
+ write_cp15_state 1, r0
+
+ push {r0} @ Push the VCPU pointer
+
+ @ Configure Hyp-role
+ configure_hyp_role 1, r0
+
+ @ Trap coprocessor CRx accesses
+ set_hstr 1
+ set_hcptr 1
+
+ @ Write configured ID register into MIDR alias
+ ldr r1, [r0, #VCPU_MIDR]
+ mcr p15, 4, r1, c0, c0, 0
+
+ @ Write guest view of MPIDR into VMPIDR
+ ldr r1, [r0, #VCPU_MPIDR]
+ mcr p15, 4, r1, c0, c0, 5
+
+ @ Load guest registers
+ add r0, r0, #(VCPU_USR_SP)
+ load_mode_state r0, usr
+ load_mode_state r0, svc
+ load_mode_state r0, abt
+ load_mode_state r0, und
+ load_mode_state r0, irq
+ load_mode_state r0, fiq
+
+ @ Load return state (r0 now points to vcpu->arch.regs.pc)
+ ldmia r0, {r2, r3}
+ msr ELR_hyp, r2
+ msr SPSR_cxsf, r3
+
+ @ Set up guest memory translation
+ sub r1, r0, #(VCPU_PC - VCPU_KVM) @ r1 points to kvm struct
+ ldr r1, [r1]
+ add r1, r1, #KVM_VTTBR
+ ldrd r2, r3, [r1]
+ mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+
+ @ Load remaining registers and do the switch
+ sub r0, r0, #(VCPU_PC - VCPU_USR_REGS)
+ ldmia r0, {r0-r12}
+ eret
+
+__kvm_vcpu_return:
+ @ Set VMID == 0
+ mov r2, #0
+ mov r3, #0
+ mcrr p15, 6, r2, r3, c2 @ Write VTTBR
+
+ @ Store return state
+ mrs r2, ELR_hyp
+ mrs r3, spsr
+ str r2, [r1, #VCPU_PC]
+ str r3, [r1, #VCPU_CPSR]
+
+ @ Store guest registers
+ add r1, r1, #(VCPU_FIQ_SPSR + 4)
+ store_mode_state r1, fiq
+ store_mode_state r1, irq
+ store_mode_state r1, und
+ store_mode_state r1, abt
+ store_mode_state r1, svc
+ store_mode_state r1, usr
+ sub r1, r1, #(VCPU_USR_REG(13))
+
+ @ Don't trap coprocessor accesses for host kernel
+ set_hstr 0
+ set_hcptr 0
+
+ @ Reset Hyp-role
+ configure_hyp_role 0, r1
+
+ @ Let host read hardware MIDR
+ mrc p15, 0, r2, c0, c0, 0
+ mcr p15, 4, r2, c0, c0, 0
+
+ @ Back to hardware MPIDR
+ mrc p15, 0, r2, c0, c0, 5
+ mcr p15, 4, r2, c0, c0, 5
+
+ @ Store guest CP15 state and restore host state
+ read_cp15_state 1, r1
+ write_cp15_state
+
+ load_mode_state sp, fiq
+ load_mode_state sp, irq
+ load_mode_state sp, und
+ load_mode_state sp, abt
+ load_mode_state sp, svc
+
+ pop {r4-r12} @ Pop r4-r12
+ pop {r2} @ Pop r13_usr
+ msr sp_usr, r2
+
+ ldr r2, =(~PAGE_MASK) @ Get svc-cpsr in case we need it for
+ mov r1, sp @ the __irq_svc call
+ tst r1, r2
+ subeq r2, r1, #0x1000
+ bicne r2, r1, r2
+ ldr r2, [r2, #4] @ r2 = svc_cpsr
+
+ hvc #0 @ switch back to svc-mode, see hyp_svc
+
+ cmp r0, #ARM_EXCEPTION_IRQ
+ bxne lr @ return to IOCTL
+
+ /*
+ * It's time to launch the kernel IRQ handler for IRQ exceptions. This
+ * requires some manipulation though.
+ *
+ * - The easiest entry point to the host handler is __irq_svc.
+ * - The __irq_svc expects to be called from SVC mode, which has been
+ * switched to from vector_stub code in entry-armv.S. The __irq_svc
+ * calls svc_entry which uses values stored in memory and pointed to
+ * by r0 to return from handler. We allocate this memory on the
+ * stack, which will contain these values:
+ * 0x8: cpsr
+ * 0x4: return_address
+ * 0x0: r0
+ */
+ adr r1, irq_kernel_resume @ Where to resume
+ push {r0 - r2}
+ mov r0, sp
+ b __irq_svc
+
+irq_kernel_resume:
+ pop {r0}
+ add sp, sp, #8
+ bx lr @ return to IOCTL
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ Hypervisor exception vector and handlers
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+/*
+ * The KVM/ARM Hypervisor ABI is defined as follows:
+ *
+ * Entry to Hyp mode from the host kernel will happen _only_ when an HVC
+ * instruction is issued since all traps are disabled when running the host
+ * kernel as per the Hyp-mode initialization at boot time.
+ *
+ * HVC instructions cause a trap to the vector page + offset 0x18 (see hyp_hvc
+ * below) when the HVC instruction is called from SVC mode (i.e. a guest or the
+ * host kernel) and they cause a trap to the vector page + offset 0xc when HVC
+ * instructions are called from within Hyp-mode.
+ *
+ * Hyp-ABI: Switching from host kernel to Hyp-mode:
+ * Switching to Hyp mode is done through a simple HVC instructions. The
+ * exception vector code will check that the HVC comes from VMID==0 and if
+ * so will store the necessary state on the Hyp stack, which will look like
+ * this (growing downwards, see the hyp_hvc handler):
+ * ...
+ * stack_page + 4: spsr (Host-SVC cpsr)
+ * stack_page : lr_usr
+ * --------------: stack bottom
+ *
+ * Hyp-ABI: Switching from Hyp-mode to host kernel SVC mode:
+ * When returning from Hyp mode to SVC mode, another HVC instruction is
+ * executed from Hyp mode, which is taken in the hyp_svc handler. The
+ * bottom of the Hyp is derived from the Hyp stack pointer (only a single
+ * page aligned stack is used per CPU) and the initial SVC registers are
+ * used to restore the host state.
+ *
+ *
+ * Note that the above is used to execute code in Hyp-mode from a host-kernel
+ * point of view, and is a different concept from performing a world-switch and
+ * executing guest code SVC mode (with a VMID != 0).
+ */
+
+@ Handle undef, svc, pabt, or dabt by crashing with a user notice
+.macro bad_exception exception_code, panic_str
+ mrrc p15, 6, r2, r3, c2 @ Read VTTBR
+ lsr r3, r3, #16
+ ands r3, r3, #0xff
+
+ @ COND:neq means we're probably in the guest and we can try fetching
+ @ the vcpu pointer and stuff off the stack and keep our fingers crossed
+ beq 99f
+ mov r0, #\exception_code
+ pop {r1} @ Load VCPU pointer
+ .if \exception_code == ARM_EXCEPTION_DATA_ABORT
+ mrc p15, 4, r2, c5, c2, 0 @ HSR
+ mrc p15, 4, r3, c6, c0, 0 @ HDFAR
+ str r2, [r1, #VCPU_HSR]
+ str r3, [r1, #VCPU_HDFAR]
+ .endif
+ .if \exception_code == ARM_EXCEPTION_PREF_ABORT
+ mrc p15, 4, r2, c5, c2, 0 @ HSR
+ mrc p15, 4, r3, c6, c0, 2 @ HIFAR
+ str r2, [r1, #VCPU_HSR]
+ str r3, [r1, #VCPU_HIFAR]
+ .endif
+ mrs r2, ELR_hyp
+ str r2, [r1, #VCPU_HYP_PC]
+ b __kvm_vcpu_return
+
+ @ We were in the host already
+99: hvc #0 @ switch to SVC mode
+ ldr r0, \panic_str
+ mrs r1, ELR_hyp
+ b panic
+
+.endm
+
+ .text
+
+ .align 5
+__kvm_hyp_vector:
+ .globl __kvm_hyp_vector
+
+ @ Hyp-mode exception vector
+ W(b) hyp_reset
+ W(b) hyp_undef
+ W(b) hyp_svc
+ W(b) hyp_pabt
+ W(b) hyp_dabt
+ W(b) hyp_hvc
+ W(b) hyp_irq
+ W(b) hyp_fiq
+
+ .align
+hyp_reset:
+ b hyp_reset
+
+ .align
+hyp_undef:
+ bad_exception ARM_EXCEPTION_UNDEFINED, und_die_str
+
+ .align
+hyp_svc:
+ @ Can only get here if HVC or SVC is called from Hyp, mode which means
+ @ we want to change mode back to SVC mode.
+ push {r12}
+ mov r12, sp
+ bic r12, r12, #0x0ff
+ bic r12, r12, #0xf00
+ ldr lr, [r12, #4]
+ msr SPSR_csxf, lr
+ ldr lr, [r12]
+ pop {r12}
+ eret
+
+ .align
+hyp_pabt:
+ bad_exception ARM_EXCEPTION_PREF_ABORT, pabt_die_str
+
+ .align
+hyp_dabt:
+ bad_exception ARM_EXCEPTION_DATA_ABORT, dabt_die_str
+
+ .align
+hyp_hvc:
+ @ Getting here is either becuase of a trap from a guest or from calling
+ @ HVC from the host kernel, which means "switch to Hyp mode".
+ push {r0, r1, r2}
+
+ @ Check syndrome register
+ mrc p15, 4, r0, c5, c2, 0 @ HSR
+ lsr r1, r0, #HSR_EC_SHIFT
+ cmp r1, #HSR_EC_HVC
+ bne guest_trap @ Not HVC instr.
+
+ @ Let's check if the HVC came from VMID 0 and allow simple
+ @ switch to Hyp mode
+ mrrc p15, 6, r1, r2, c2
+ lsr r2, r2, #16
+ and r2, r2, #0xff
+ cmp r2, #0
+ bne guest_trap @ Guest called HVC
+
+ @ Store lr_usr,spsr (svc cpsr) on bottom of stack
+ mov r1, sp
+ bic r1, r1, #0x0ff
+ bic r1, r1, #0xf00
+ str lr, [r1]
+ mrs lr, spsr
+ str lr, [r1, #4]
+
+ pop {r0, r1, r2}
+
+ @ Return to caller in Hyp mode
+ mrs lr, ELR_hyp
+ mov pc, lr
+
+guest_trap:
+ ldr r1, [sp, #12] @ Load VCPU pointer
+ str r0, [r1, #VCPU_HSR]
+ add r1, r1, #VCPU_USR_REG(3)
+ stmia r1, {r3-r12}
+ sub r1, r1, #(VCPU_USR_REG(3) - VCPU_USR_REG(0))
+ pop {r3, r4, r5}
+ add sp, sp, #4 @ We loaded the VCPU pointer above
+ stmia r1, {r3, r4, r5}
+ sub r1, r1, #VCPU_USR_REG(0)
+
+ @ Check if we need the fault information
+ lsr r2, r0, #HSR_EC_SHIFT
+ cmp r2, #HSR_EC_IABT
+ beq 2f
+ cmpne r2, #HSR_EC_DABT
+ bne 1f
+
+ @ For non-valid data aborts, get the offending instr. PA
+ lsr r2, r0, #HSR_ISV_SHIFT
+ ands r2, r2, #1
+ bne 2f
+ mrs r3, ELR_hyp
+ mcr p15, 0, r3, c7, c8, 0 @ VA to PA, ATS1CPR
+ mrrc p15, 0, r4, r5, c7 @ PAR
+ add r6, r1, #VCPU_PC_IPA
+ strd r4, r5, [r6]
+
+ @ Check if we might have a wide thumb instruction spill-over
+ ldr r5, =0xfff
+ bic r4, r3, r5 @ clear page mask
+ sub r5, r5, #1 @ last 2-byte page bounday, 0xffe
+ cmp r4, r5
+ bne 2f
+ add r4, r3, #2 @ _really_ unlikely!
+ mcr p15, 0, r4, c7, c8, 0 @ VA to PA, ATS1CPR
+ mrrc p15, 0, r4, r5, c7 @ PAR
+ add r6, r1, #VCPU_PC_IPA2
+ strd r4, r5, [r6]
+
+2: mrc p15, 4, r2, c6, c0, 0 @ HDFAR
+ mrc p15, 4, r3, c6, c0, 2 @ HIFAR
+ mrc p15, 4, r4, c6, c0, 4 @ HPFAR
+ add r5, r1, #VCPU_HDFAR
+ stmia r5, {r2, r3, r4}
+
+1: mov r0, #ARM_EXCEPTION_HVC
+ b __kvm_vcpu_return
+
+ .align
+hyp_irq:
+ push {r0}
+ ldr r0, [sp, #4] @ Load VCPU pointer
+ add r0, r0, #(VCPU_USR_REG(1))
+ stmia r0, {r1-r12}
+ pop {r0, r1} @ r1 == vcpu pointer
+ str r0, [r1, #VCPU_USR_REG(0)]
+
+ mov r0, #ARM_EXCEPTION_IRQ
+ b __kvm_vcpu_return
+
+ .align
+hyp_fiq:
+ b hyp_fiq
+
+ .ltorg
+
+und_die_str:
+ .ascii "unexpected undefined exception in Hyp mode at: %#08x"
+pabt_die_str:
+ .ascii "unexpected prefetch abort in Hyp mode at: %#08x"
+dabt_die_str:
+ .ascii "unexpected data abort in Hyp mode at: %#08x"
+
+/*
+ * The below lines makes sure the HYP mode code fits in a single page (the
+ * assembler will bark at you if it doesn't). Please keep them together. If
+ * you plan to restructure the code or increase its size over a page, you'll
+ * have to fix the code in init_hyp_mode().
+ */
+__kvm_hyp_code_end:
+ .globl __kvm_hyp_code_end
+
+ .org __kvm_hyp_code_start + PAGE_SIZE
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
new file mode 100644
index 0000000000000..8c303769a863a
--- /dev/null
+++ b/arch/arm/kvm/mmu.c
@@ -0,0 +1,644 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/mman.h>
+#include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
+#include <asm/idmap.h>
+#include <asm/pgalloc.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+
+#include "trace.h"
+
+static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
+
+static void free_ptes(pmd_t *pmd, unsigned long addr)
+{
+ pte_t *pte;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
+ if (!pmd_none(*pmd) && pmd_table(*pmd)) {
+ pte = pte_offset_kernel(pmd, addr);
+ pte_free_kernel(NULL, pte);
+ }
+ pmd++;
+ }
+}
+
+/**
+ * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables
+ *
+ * Assumes this is a page table used strictly in Hyp-mode and therefore contains
+ * only mappings in the kernel memory area, which is above PAGE_OFFSET.
+ */
+void free_hyp_pmds(void)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long addr;
+
+ mutex_lock(&kvm_hyp_pgd_mutex);
+ for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) {
+ pgd = hyp_pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+
+ if (pud_none(*pud))
+ continue;
+ BUG_ON(pud_bad(*pud));
+
+ pmd = pmd_offset(pud, addr);
+ free_ptes(pmd, addr);
+ pmd_free(NULL, pmd);
+ pud_clear(pud);
+ }
+ mutex_unlock(&kvm_hyp_pgd_mutex);
+}
+
+static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
+ unsigned long end)
+{
+ pte_t *pte;
+ struct page *page;
+ unsigned long addr;
+
+ for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
+ pte = pte_offset_kernel(pmd, addr);
+ BUG_ON(!virt_addr_valid(addr));
+ page = virt_to_page(addr);
+
+ set_pte_ext(pte, mk_pte(page, PAGE_HYP), 0);
+ }
+}
+
+static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
+ unsigned long end)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long addr, next;
+
+ for (addr = start; addr < end; addr = next) {
+ pmd = pmd_offset(pud, addr);
+
+ BUG_ON(pmd_sect(*pmd));
+
+ if (pmd_none(*pmd)) {
+ pte = pte_alloc_one_kernel(NULL, addr);
+ if (!pte) {
+ kvm_err("Cannot allocate Hyp pte\n");
+ return -ENOMEM;
+ }
+ pmd_populate_kernel(NULL, pmd, pte);
+ }
+
+ next = pmd_addr_end(addr, end);
+ create_hyp_pte_mappings(pmd, addr, next);
+ }
+
+ return 0;
+}
+
+/**
+ * create_hyp_mappings - map a kernel virtual address range in Hyp mode
+ * @from: The virtual kernel start address of the range
+ * @to: The virtual kernel end address of the range (exclusive)
+ *
+ * The same virtual address as the kernel virtual address is also used in
+ * Hyp-mode mapping to the same underlying physical pages.
+ *
+ * Note: Wrapping around zero in the "to" address is not supported.
+ */
+int create_hyp_mappings(void *from, void *to)
+{
+ unsigned long start = (unsigned long)from;
+ unsigned long end = (unsigned long)to;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long addr, next;
+ int err = 0;
+
+ BUG_ON(start > end);
+ if (start < PAGE_OFFSET)
+ return -EINVAL;
+
+ mutex_lock(&kvm_hyp_pgd_mutex);
+ for (addr = start; addr < end; addr = next) {
+ pgd = hyp_pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+
+ if (pud_none_or_clear_bad(pud)) {
+ pmd = pmd_alloc_one(NULL, addr);
+ if (!pmd) {
+ kvm_err("Cannot allocate Hyp pmd\n");
+ err = -ENOMEM;
+ goto out;
+ }
+ pud_populate(NULL, pud, pmd);
+ }
+
+ next = pgd_addr_end(addr, end);
+ err = create_hyp_pmd_mappings(pud, addr, next);
+ if (err)
+ goto out;
+ }
+out:
+ mutex_unlock(&kvm_hyp_pgd_mutex);
+ return err;
+}
+
+/**
+ * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
+ * @kvm: The KVM struct pointer for the VM.
+ *
+ * Allocates the 1st level table only of size defined by PGD2_ORDER (can
+ * support either full 40-bit input addresses or limited to 32-bit input
+ * addresses). Clears the allocated pages.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * created, which can only be done once.
+ */
+int kvm_alloc_stage2_pgd(struct kvm *kvm)
+{
+ pgd_t *pgd;
+
+ if (kvm->arch.pgd != NULL) {
+ kvm_err("kvm_arch already initialized?\n");
+ return -EINVAL;
+ }
+
+ pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD2_ORDER);
+ if (!pgd)
+ return -ENOMEM;
+
+ memset(pgd, 0, PTRS_PER_PGD2 * sizeof(pgd_t));
+ kvm->arch.pgd = pgd;
+
+ return 0;
+}
+
+static void free_guest_pages(pte_t *pte, unsigned long addr)
+{
+ unsigned int i;
+ struct page *page;
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ if (pte_present(*pte)) {
+ page = pfn_to_page(pte_pfn(*pte));
+ put_page(page);
+ }
+ pte++;
+ }
+}
+
+static void free_stage2_ptes(pmd_t *pmd, unsigned long addr)
+{
+ unsigned int i;
+ pte_t *pte;
+ struct page *page;
+
+ for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) {
+ BUG_ON(pmd_sect(*pmd));
+ if (!pmd_none(*pmd) && pmd_table(*pmd)) {
+ pte = pte_offset_kernel(pmd, addr);
+ free_guest_pages(pte, addr);
+ page = virt_to_page((void *)pte);
+ WARN_ON(atomic_read(&page->_count) != 1);
+ pte_free_kernel(NULL, pte);
+ }
+ pmd++;
+ }
+}
+
+/**
+ * kvm_free_stage2_pgd - free all stage-2 tables
+ * @kvm: The KVM struct pointer for the VM.
+ *
+ * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all
+ * underlying level-2 and level-3 tables before freeing the actual level-1 table
+ * and setting the struct pointer to NULL.
+ *
+ * Note we don't need locking here as this is only called when the VM is
+ * destroyed, which can only be done once.
+ */
+void kvm_free_stage2_pgd(struct kvm *kvm)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long long i, addr;
+
+ if (kvm->arch.pgd == NULL)
+ return;
+
+ /*
+ * We do this slightly different than other places, since we need more
+ * than 32 bits and for instance pgd_addr_end converts to unsigned long.
+ */
+ addr = 0;
+ for (i = 0; i < PTRS_PER_PGD2; i++) {
+ addr = i * (unsigned long long)PGDIR_SIZE;
+ pgd = kvm->arch.pgd + i;
+ pud = pud_offset(pgd, addr);
+
+ if (pud_none(*pud))
+ continue;
+
+ BUG_ON(pud_bad(*pud));
+
+ pmd = pmd_offset(pud, addr);
+ free_stage2_ptes(pmd, addr);
+ pmd_free(NULL, pmd);
+ }
+
+ free_pages((unsigned long)kvm->arch.pgd, PGD2_ORDER);
+ kvm->arch.pgd = NULL;
+}
+
+static const pte_t null_pte;
+
+static int stage2_set_pte(struct kvm *kvm, phys_addr_t addr,
+ const pte_t *new_pte)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /* Create 2nd stage page table mapping - Level 1 */
+ pgd = kvm->arch.pgd + pgd_index(addr);
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud)) {
+ BUG_ON(new_pte == &null_pte);
+ pmd = pmd_alloc_one(NULL, addr);
+ if (!pmd) {
+ kvm_err("Cannot allocate 2nd stage pmd\n");
+ return -ENOMEM;
+ }
+ pud_populate(NULL, pud, pmd);
+ pmd += pmd_index(addr);
+ } else
+ pmd = pmd_offset(pud, addr);
+
+ /* Create 2nd stage page table mapping - Level 2 */
+ if (pmd_none(*pmd)) {
+ BUG_ON(new_pte == &null_pte);
+ pte = pte_alloc_one_kernel(NULL, addr);
+ if (!pte) {
+ kvm_err("Cannot allocate 2nd stage pte\n");
+ return -ENOMEM;
+ }
+ pmd_populate_kernel(NULL, pmd, pte);
+ pte += pte_index(addr);
+ } else
+ pte = pte_offset_kernel(pmd, addr);
+
+ /* Create 2nd stage page table mapping - Level 3 */
+ set_pte_ext(pte, *new_pte, 0);
+
+ return 0;
+}
+
+static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+ gfn_t gfn, struct kvm_memory_slot *memslot,
+ bool is_iabt)
+{
+ pte_t new_pte;
+ pfn_t pfn;
+ int ret;
+ bool write_fault, writable;
+
+ /* TODO: Use instr. decoding for non-ISV to determine r/w fault */
+ if (is_iabt)
+ write_fault = false;
+ else if ((vcpu->arch.hsr & HSR_ISV) && !(vcpu->arch.hsr & HSR_WNR))
+ write_fault = false;
+ else
+ write_fault = true;
+
+ if ((vcpu->arch.hsr & HSR_FSC_TYPE) == FSC_PERM && !write_fault) {
+ kvm_err("Unexpected L2 read permission error\n");
+ return -EFAULT;
+ }
+
+ /* preemption disabled for handle_exit, gfn_to_pfn may sleep */
+ preempt_enable();
+ pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable);
+ preempt_disable();
+
+ if (is_error_pfn(pfn)) {
+ put_page(pfn_to_page(pfn));
+ kvm_err("No host mapping: gfn %u (0x%08x)\n",
+ (unsigned int)gfn,
+ (unsigned int)gfn << PAGE_SHIFT);
+ return -EFAULT;
+ }
+
+ mutex_lock(&vcpu->kvm->arch.pgd_mutex);
+ new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+ if (writable)
+ new_pte |= L_PTE2_WRITE;
+ ret = stage2_set_pte(vcpu->kvm, fault_ipa, &new_pte);
+ if (ret)
+ put_page(pfn_to_page(pfn));
+ mutex_unlock(&vcpu->kvm->arch.pgd_mutex);
+
+ return ret;
+}
+
+/**
+ * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
+ * @vcpu: The VCPU pointer
+ * @run: The VCPU run struct containing the mmio data
+ *
+ * This should only be called after returning from userspace for MMIO load
+ * emulation.
+ */
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ int *dest;
+ unsigned int len;
+ int mask;
+
+ if (!run->mmio.is_write) {
+ dest = vcpu_reg(vcpu, vcpu->arch.mmio_rd);
+ memset(dest, 0, sizeof(int));
+
+ len = run->mmio.len;
+ if (len > 4)
+ return -EINVAL;
+
+ memcpy(dest, run->mmio.data, len);
+
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+ *((u64 *)run->mmio.data));
+
+ if (vcpu->arch.mmio_sign_extend && len < 4) {
+ mask = 1U << ((len * 8) - 1);
+ *dest = (*dest ^ mask) - mask;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * invalid_io_mem_abort -- Handle I/O aborts ISV bit is clear
+ *
+ * @vcpu: The vcpu pointer
+ * @fault_ipa: The IPA that caused the 2nd stage fault
+ *
+ * Some load/store instructions cannot be emulated using the information
+ * presented in the HSR, for instance, register write-back instructions are not
+ * supported. We therefore need to fetch the instruction, decode it, and then
+ * emulate its behavior.
+ */
+static int invalid_io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+ unsigned long instr;
+ phys_addr_t pc_ipa;
+
+ if (vcpu->arch.pc_ipa & 1) {
+ kvm_err("I/O Abort from invalid instruction address? Wrong!\n");
+ return -EINVAL;
+ }
+
+ if (vcpu->arch.pc_ipa & (1U << 11)) {
+ /* LPAE PAR format */
+ /* TODO: Check if this ever happens - called from Hyp mode */
+ pc_ipa = vcpu->arch.pc_ipa & PAGE_MASK & ((1ULL << 32) - 1);
+ } else {
+ /* VMSAv7 PAR format */
+ pc_ipa = vcpu->arch.pc_ipa & PAGE_MASK & ((1ULL << 40) - 1);
+ }
+ pc_ipa += vcpu->arch.regs.pc & ~PAGE_MASK;
+
+ if (vcpu->arch.regs.cpsr & PSR_T_BIT) {
+ /* TODO: Check validity of PC IPA and IPA2!!! */
+ /* Need to decode thumb instructions as well */
+ kvm_err("Thumb guest support not there yet :(\n");
+ return -EINVAL;
+ }
+
+ if (kvm_read_guest(vcpu->kvm, pc_ipa, &instr, sizeof(instr))) {
+ kvm_err("Could not copy guest instruction\n");
+ return -EFAULT;
+ }
+
+ return kvm_emulate_mmio_ls(vcpu, fault_ipa, instr);
+}
+
+static int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ phys_addr_t fault_ipa, struct kvm_memory_slot *memslot)
+{
+ unsigned long rd, len, instr_len;
+ bool is_write, sign_extend;
+
+ if (!(vcpu->arch.hsr & HSR_ISV))
+ return invalid_io_mem_abort(vcpu, fault_ipa);
+
+ if (((vcpu->arch.hsr >> 8) & 1)) {
+ kvm_err("Not supported, Cache operation on I/O addr.\n");
+ return -EFAULT;
+ }
+
+ if ((vcpu->arch.hsr >> 7) & 1) {
+ kvm_err("Translation table accesses I/O memory\n");
+ return -EFAULT;
+ }
+
+ switch ((vcpu->arch.hsr >> 22) & 0x3) {
+ case 0:
+ len = 1;
+ break;
+ case 1:
+ len = 2;
+ break;
+ case 2:
+ len = 4;
+ break;
+ default:
+ kvm_err("Invalid I/O abort\n");
+ return -EFAULT;
+ }
+
+ is_write = vcpu->arch.hsr & HSR_WNR;
+ sign_extend = vcpu->arch.hsr & HSR_SSE;
+ rd = (vcpu->arch.hsr & HSR_SRT_MASK) >> HSR_SRT_SHIFT;
+ BUG_ON(rd > 15);
+
+ if (rd == 15) {
+ kvm_err("I/O memory trying to read/write pc\n");
+ return -EFAULT;
+ }
+
+ /* Get instruction length in bytes */
+ instr_len = (vcpu->arch.hsr & HSR_IL) ? 4 : 2;
+
+ /* Export MMIO operations to user space */
+ run->mmio.is_write = is_write;
+ run->mmio.phys_addr = fault_ipa;
+ run->mmio.len = len;
+ vcpu->arch.mmio_sign_extend = sign_extend;
+ vcpu->arch.mmio_rd = rd;
+
+ trace_kvm_mmio((is_write) ? KVM_TRACE_MMIO_WRITE :
+ KVM_TRACE_MMIO_READ_UNSATISFIED,
+ len, fault_ipa, (is_write) ? *vcpu_reg(vcpu, rd) : 0);
+
+ if (is_write)
+ memcpy(run->mmio.data, vcpu_reg(vcpu, rd), len);
+
+ /*
+ * The MMIO instruction is emulated and should not be re-executed
+ * in the guest.
+ */
+ *vcpu_pc(vcpu) += instr_len;
+ kvm_adjust_itstate(vcpu);
+ vcpu->stat.mmio_exits++;
+ return KVM_EXIT_MMIO;
+}
+
+/**
+ * kvm_handle_guest_abort - handles all 2nd stage aborts
+ * @vcpu: the VCPU pointer
+ * @run: the kvm_run structure
+ *
+ * Any abort that gets to the host is almost guaranteed to be caused by a
+ * missing second stage translation table entry, which can mean that either the
+ * guest simply needs more memory and we must allocate an appropriate page or it
+ * can mean that the guest tried to access I/O memory, which is emulated by user
+ * space. The distinction is based on the IPA causing the fault and whether this
+ * memory region has been registered as standard RAM by user space.
+ */
+int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ unsigned long hsr_ec;
+ unsigned long fault_status;
+ phys_addr_t fault_ipa;
+ struct kvm_memory_slot *memslot = NULL;
+ bool is_iabt;
+ gfn_t gfn;
+
+ hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT;
+ is_iabt = (hsr_ec == HSR_EC_IABT);
+
+ /* Check that the second stage fault is a translation fault */
+ fault_status = (vcpu->arch.hsr & HSR_FSC_TYPE);
+ if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
+ kvm_err("Unsupported fault status: EC=%#lx DFCS=%#lx\n",
+ hsr_ec, fault_status);
+ return -EFAULT;
+ }
+
+ fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8;
+
+ gfn = fault_ipa >> PAGE_SHIFT;
+ if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ if (is_iabt) {
+ kvm_err("Inst. abort on I/O address %08lx\n",
+ (unsigned long)fault_ipa);
+ return -EFAULT;
+ }
+
+ /* Adjust page offset */
+ fault_ipa |= vcpu->arch.hdfar & ~PAGE_MASK;
+ return io_mem_abort(vcpu, run, fault_ipa, memslot);
+ }
+
+ memslot = gfn_to_memslot(vcpu->kvm, gfn);
+ if (!memslot->user_alloc) {
+ kvm_err("non user-alloc memslots not supported\n");
+ return -EINVAL;
+ }
+
+ return user_mem_abort(vcpu, fault_ipa, gfn, memslot, is_iabt);
+}
+
+static bool hva_to_gpa(struct kvm *kvm, unsigned long hva, gpa_t *gpa)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ bool found = false;
+
+ mutex_lock(&kvm->slots_lock);
+ slots = kvm_memslots(kvm);
+
+ /* we only care about the pages that the guest sees */
+ kvm_for_each_memslot(memslot, slots) {
+ unsigned long start = memslot->userspace_addr;
+ unsigned long end;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+ if (hva >= start && hva < end) {
+ gpa_t gpa_offset = hva - start;
+ *gpa = (memslot->base_gfn << PAGE_SHIFT) + gpa_offset;
+ found = true;
+ /* no overlapping memslots allowed: break */
+ break;
+ }
+ }
+
+ mutex_unlock(&kvm->slots_lock);
+ return found;
+}
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+ bool found;
+ gpa_t gpa;
+
+ if (!kvm->arch.pgd)
+ return 0;
+
+ mutex_lock(&kvm->arch.pgd_mutex);
+ found = hva_to_gpa(kvm, hva, &gpa);
+ if (found) {
+ stage2_set_pte(kvm, gpa, &null_pte);
+ __kvm_tlb_flush_vmid(kvm);
+ }
+ mutex_unlock(&kvm->arch.pgd_mutex);
+ return 0;
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ gpa_t gpa;
+ bool found;
+
+ if (!kvm->arch.pgd)
+ return;
+
+ mutex_lock(&kvm->arch.pgd_mutex);
+ found = hva_to_gpa(kvm, hva, &gpa);
+ if (found) {
+ stage2_set_pte(kvm, gpa, &pte);
+ /*
+ * Ignore return code from stage2_set_pte, since -ENOMEM would
+ * indicate this IPA is is not mapped and there is no harm
+ * that the PTE changed.
+ */
+ __kvm_tlb_flush_vmid(kvm);
+ }
+ mutex_unlock(&kvm->arch.pgd_mutex);
+}
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
new file mode 100644
index 0000000000000..78488be1c4a04
--- /dev/null
+++ b/arch/arm/kvm/reset.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+
+#include <asm/unified.h>
+#include <asm/ptrace.h>
+#include <asm/cputype.h>
+#include <asm/kvm_arm.h>
+
+#define CT_ASSERT(expr, name) extern char name[(expr) ? 1 : -1]
+#define CP15_REGS_ASSERT(_array, _name) \
+ CT_ASSERT((sizeof(_array) / sizeof(_array[0])) == nr_cp15_regs, _name)
+#define UNKNOWN 0xdecafbad
+
+/******************************************************************************
+ * Cortex-A15 Register Reset Values
+ */
+
+static const int a15_max_cpu_idx = 3;
+
+static struct kvm_vcpu_regs a15_regs_reset = {
+ .cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT,
+};
+
+static u32 a15_cp15_regs_reset[][2] = {
+ { c0_MIDR, 0x412FC0F0 },
+ { c0_MPIDR, 0x00000000 }, /* see kvm_arch_vcpu_init */
+ { c1_SCTLR, 0x00C50078 },
+ { c1_ACTLR, 0x00000000 },
+ { c1_CPACR, 0x00000000 },
+ { c2_TTBR0, UNKNOWN },
+ { c2_TTBR0_high, UNKNOWN },
+ { c2_TTBR1, UNKNOWN },
+ { c2_TTBR1_high, UNKNOWN },
+ { c2_TTBCR, 0x00000000 },
+ { c3_DACR, UNKNOWN },
+ { c5_DFSR, UNKNOWN },
+ { c5_IFSR, UNKNOWN },
+ { c5_ADFSR, UNKNOWN },
+ { c5_AIFSR, UNKNOWN },
+ { c6_DFAR, UNKNOWN },
+ { c6_IFAR, UNKNOWN },
+ { c10_PRRR, 0x00098AA4 },
+ { c10_NMRR, 0x44E048E0 },
+ { c12_VBAR, 0x00000000 },
+ { c13_CID, 0x00000000 },
+ { c13_TID_URW, UNKNOWN },
+ { c13_TID_URO, UNKNOWN },
+ { c13_TID_PRIV, UNKNOWN },
+};
+CP15_REGS_ASSERT(a15_cp15_regs_reset, a15_cp15_regs_reset_init);
+
+static void a15_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Compute guest MPIDR:
+ * (Even if we present only one VCPU to the guest on an SMP
+ * host we don't set the U bit in the MPIDR, or vice versa, as
+ * revealing the underlying hardware properties is likely to
+ * be the best choice).
+ */
+ vcpu->arch.cp15[c0_MPIDR] = (read_cpuid_mpidr() & ~MPIDR_CPUID)
+ | (vcpu->vcpu_id & MPIDR_CPUID);
+}
+
+
+/*******************************************************************************
+ * Exported reset function
+ */
+
+/**
+ * kvm_reset_vcpu - sets core registers and cp15 registers to reset value
+ * @vcpu: The VCPU pointer
+ *
+ * This function finds the right table above and sets the registers on the
+ * virtual CPU struct to their architectually defined reset values.
+ */
+int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
+{
+ unsigned int i;
+ struct kvm_vcpu_regs *cpu_reset;
+ u32 (*cp15_reset)[2];
+ void (*cpu_reset_vcpu)(struct kvm_vcpu *vcpu);
+
+ switch (kvm_target_cpu()) {
+ case CORTEX_A15:
+ if (vcpu->vcpu_id > a15_max_cpu_idx)
+ return -EINVAL;
+ cpu_reset = &a15_regs_reset;
+ cp15_reset = a15_cp15_regs_reset;
+ cpu_reset_vcpu = a15_reset_vcpu;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ /* Reset core registers */
+ memcpy(&vcpu->arch.regs, cpu_reset, sizeof(vcpu->arch.regs));
+
+ /* Reset CP15 registers */
+ for (i = 0; i < nr_cp15_regs; i++) {
+ if (cp15_reset[i][0] != i) {
+ kvm_err("CP15 field %d is %d, expected %d\n",
+ i, cp15_reset[i][0], i);
+ return -ENXIO;
+ }
+ vcpu->arch.cp15[i] = cp15_reset[i][1];
+ }
+
+ /* Physical CPU specific runtime reset operations */
+ cpu_reset_vcpu(vcpu);
+
+ return 0;
+}
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
new file mode 100644
index 0000000000000..28ed1a18b289e
--- /dev/null
+++ b/arch/arm/kvm/trace.h
@@ -0,0 +1,117 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+/*
+ * Tracepoints for entry/exit to guest
+ */
+TRACE_EVENT(kvm_entry,
+ TP_PROTO(unsigned long vcpu_pc),
+ TP_ARGS(vcpu_pc),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ ),
+
+ TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_exit,
+ TP_PROTO(unsigned long vcpu_pc),
+ TP_ARGS(vcpu_pc),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ ),
+
+ TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+);
+
+TRACE_EVENT(kvm_mmio_emulate,
+ TP_PROTO(unsigned long vcpu_pc, unsigned long instr,
+ unsigned long cpsr),
+ TP_ARGS(vcpu_pc, instr, cpsr),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ __field( unsigned long, instr )
+ __field( unsigned long, cpsr )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ __entry->instr = instr;
+ __entry->cpsr = cpsr;
+ ),
+
+ TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)",
+ __entry->vcpu_pc, __entry->instr, __entry->cpsr)
+);
+
+/* Architecturally implementation defined CP15 register access */
+TRACE_EVENT(kvm_emulate_cp15_imp,
+ TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
+ unsigned long CRm, unsigned long Op2, bool is_write),
+ TP_ARGS(Op1, Rt1, CRn, CRm, Op2, is_write),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, Op1 )
+ __field( unsigned int, Rt1 )
+ __field( unsigned int, CRn )
+ __field( unsigned int, CRm )
+ __field( unsigned int, Op2 )
+ __field( bool, is_write )
+ ),
+
+ TP_fast_assign(
+ __entry->is_write = is_write;
+ __entry->Op1 = Op1;
+ __entry->Rt1 = Rt1;
+ __entry->CRn = CRn;
+ __entry->CRm = CRm;
+ __entry->Op2 = Op2;
+ ),
+
+ TP_printk("Implementation defined CP15: %s\tp15, %u, r%u, c%u, c%u, %u",
+ (__entry->is_write) ? "mcr" : "mrc",
+ __entry->Op1, __entry->Rt1, __entry->CRn,
+ __entry->CRm, __entry->Op2)
+);
+
+TRACE_EVENT(kvm_wfi,
+ TP_PROTO(unsigned long vcpu_pc),
+ TP_ARGS(vcpu_pc),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, vcpu_pc )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_pc = vcpu_pc;
+ ),
+
+ TP_printk("guest executed wfi at: 0x%08lx", __entry->vcpu_pc)
+);
+
+
+#endif /* _TRACE_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH arch/arm/kvm
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 101b9681c08c2..037dc53e00905 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -597,6 +597,16 @@ config ARM_LPAE
If unsure, say N.
+config ARM_VIRT_EXT
+ bool "Support for ARM Virtualization Extensions"
+ depends on ARM_LPAE
+ help
+ Say Y if you have an ARMv7 processor supporting the ARM hardware
+ Virtualization extensions. KVM depends on this feature and will
+ not run without it being selected. If you say Y here, the kernel
+ will not boot on a machine without virtualization extensions and
+ will not boot as a KVM guest.
+
config ARCH_PHYS_ADDR_T_64BIT
def_bool ARM_LPAE
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index ab88ed4f8e08f..7a944af10dbda 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -1,4 +1,6 @@
+#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <asm/cputype.h>
#include <asm/idmap.h>
@@ -59,11 +61,20 @@ static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
} while (pud++, addr = next, addr != end);
}
-static void identity_mapping_add(pgd_t *pgd, unsigned long addr, unsigned long end)
+static void identity_mapping_add(pgd_t *pgd, const char *text_start,
+ const char *text_end, unsigned long prot)
{
- unsigned long prot, next;
+ unsigned long addr, end;
+ unsigned long next;
+
+ addr = virt_to_phys(text_start);
+ end = virt_to_phys(text_end);
+
+ pr_info("Setting up static %sidentity map for 0x%llx - 0x%llx\n",
+ prot ? "HYP " : "",
+ (long long)addr, (long long)end);
+ prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
- prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
prot |= PMD_BIT4;
@@ -78,24 +89,77 @@ extern char __idmap_text_start[], __idmap_text_end[];
static int __init init_static_idmap(void)
{
- phys_addr_t idmap_start, idmap_end;
-
idmap_pgd = pgd_alloc(&init_mm);
if (!idmap_pgd)
return -ENOMEM;
- /* Add an identity mapping for the physical address of the section. */
- idmap_start = virt_to_phys((void *)__idmap_text_start);
- idmap_end = virt_to_phys((void *)__idmap_text_end);
-
- pr_info("Setting up static identity map for 0x%llx - 0x%llx\n",
- (long long)idmap_start, (long long)idmap_end);
- identity_mapping_add(idmap_pgd, idmap_start, idmap_end);
+ identity_mapping_add(idmap_pgd, __idmap_text_start,
+ __idmap_text_end, 0);
return 0;
}
early_initcall(init_static_idmap);
+#ifdef CONFIG_ARM_VIRT_EXT
+pgd_t *hyp_pgd;
+EXPORT_SYMBOL_GPL(hyp_pgd);
+
+static void hyp_idmap_del_pmd(pgd_t *pgd, unsigned long addr)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pud = pud_offset(pgd, addr);
+ pmd = pmd_offset(pud, addr);
+ pud_clear(pud);
+ clean_pmd_entry(pmd);
+ pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK));
+}
+
+extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+
+/*
+ * This version actually frees the underlying pmds for all pgds in range and
+ * clear the pgds themselves afterwards.
+ */
+void hyp_idmap_teardown(void)
+{
+ unsigned long addr, end;
+ unsigned long next;
+ pgd_t *pgd = hyp_pgd;
+
+ addr = virt_to_phys(__hyp_idmap_text_start);
+ end = virt_to_phys(__hyp_idmap_text_end);
+
+ pgd += pgd_index(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (!pgd_none_or_clear_bad(pgd))
+ hyp_idmap_del_pmd(pgd, addr);
+ } while (pgd++, addr = next, addr < end);
+}
+EXPORT_SYMBOL_GPL(hyp_idmap_teardown);
+
+void hyp_idmap_setup(void)
+{
+ identity_mapping_add(hyp_pgd, __hyp_idmap_text_start,
+ __hyp_idmap_text_end, PMD_SECT_AP1);
+}
+EXPORT_SYMBOL_GPL(hyp_idmap_setup);
+
+static int __init hyp_init_static_idmap(void)
+{
+ hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+ if (!hyp_pgd)
+ return -ENOMEM;
+
+ hyp_idmap_setup();
+
+ return 0;
+}
+early_initcall(hyp_init_static_idmap);
+#endif
+
/*
* In order to soft-boot, we need to switch to a 1:1 mapping for the
* cpu_reset functions. This will then ensure that we have predictable
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e5dad60b558b4..7dd4b5463f32d 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -56,9 +56,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
static unsigned int ecc_mask __initdata = 0;
pgprot_t pgprot_user;
pgprot_t pgprot_kernel;
+pgprot_t pgprot_guest;
EXPORT_SYMBOL(pgprot_user);
EXPORT_SYMBOL(pgprot_kernel);
+EXPORT_SYMBOL(pgprot_guest);
struct cachepolicy {
const char policy[16];
@@ -301,6 +303,12 @@ const struct mem_type *get_mem_type(unsigned int type)
}
EXPORT_SYMBOL(get_mem_type);
+pteval_t get_mem_type_prot_pte(unsigned int type)
+{
+ return get_mem_type(type)->prot_pte;
+}
+EXPORT_SYMBOL(get_mem_type_prot_pte);
+
/*
* Adjust the PMD section entries according to the CPU in use.
*/
@@ -514,6 +522,7 @@ static void __init build_mem_type_table(void)
pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | kern_pgprot);
+ pgprot_guest = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG);
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
diff --git a/arch/ia64/include/asm/kvm.h b/arch/ia64/include/asm/kvm.h
index b9f82c84f0932..ec6c6b3012389 100644
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -26,6 +26,7 @@
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_DEVICE_ASSIGNMENT
/* Architectural interrupt line count. */
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 9806e55f91bee..df5351e3eed73 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -19,6 +19,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
+ depends on BROKEN
depends on HAVE_KVM && MODULES && EXPERIMENTAL
# for device assignment:
depends on PCI
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bd77cb507c1c7..122a4b2a714af 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -924,6 +924,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return 0;
}
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+ if (!irqchip_in_kernel(kvm))
+ return -ENXIO;
+
+ irq_event->statusstatus = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event->irq, irq_event->level);
+ return 0;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -963,29 +973,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
}
break;
- case KVM_IRQ_LINE_STATUS:
- case KVM_IRQ_LINE: {
- struct kvm_irq_level irq_event;
-
- r = -EFAULT;
- if (copy_from_user(&irq_event, argp, sizeof irq_event))
- goto out;
- r = -ENXIO;
- if (irqchip_in_kernel(kvm)) {
- __s32 status;
- status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
- irq_event.irq, irq_event.level);
- if (ioctl == KVM_IRQ_LINE_STATUS) {
- r = -EFAULT;
- irq_event.status = status;
- if (copy_to_user(argp, &irq_event,
- sizeof irq_event))
- goto out;
- }
- r = 0;
- }
- break;
- }
case KVM_GET_IRQCHIP: {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip chip;
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index 976835d8f22e2..bf2c06c338719 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -153,6 +153,8 @@
#define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5"
#define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4"
+extern bool epapr_paravirt_enabled;
+extern u32 epapr_hypercall_start[];
/*
* We use "uintptr_t" to define a register because it's guaranteed to be a
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index b0c08b142770d..0dd1d86d3e316 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -36,11 +36,8 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
#define SPAPR_TCE_SHIFT 12
#ifdef CONFIG_KVM_BOOK3S_64_HV
-/* For now use fixed-size 16MB page table */
-#define HPT_ORDER 24
-#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
-#define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */
-#define HPT_HASH_MASK (HPT_NPTEG - 1)
+#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
+extern int kvm_hpt_order; /* order of preallocated HPTs */
#endif
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d848cdc497152..50ea12fd7bf5e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -237,6 +237,10 @@ struct kvm_arch {
unsigned long vrma_slb_v;
int rma_setup_done;
int using_mmu_notifiers;
+ u32 hpt_order;
+ atomic_t vcpus_running;
+ unsigned long hpt_npte;
+ unsigned long hpt_mask;
spinlock_t slot_phys_lock;
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
int slot_npages[KVM_MEM_SLOTS_NUM];
@@ -414,7 +418,9 @@ struct kvm_vcpu_arch {
ulong mcsrr1;
ulong mcsr;
u32 dec;
+#ifdef CONFIG_BOOKE
u32 decar;
+#endif
u32 tbl;
u32 tbu;
u32 tcr;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index f68c22fa2fceb..0124937a23b97 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -119,7 +119,8 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
-extern long kvmppc_alloc_hpt(struct kvm *kvm);
+extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp);
+extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
extern void kvmppc_free_hpt(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 83afacd3ba7b3..bb282dd816121 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -128,6 +128,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
obj-y += ppc_save_regs.o
endif
+obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
# Disable GCOV in odd or sensitive code
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
new file mode 100644
index 0000000000000..697b390ebfd8a
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/* Hypercall entry point. Will be patched with device tree instructions. */
+.global epapr_hypercall_start
+epapr_hypercall_start:
+ li r3, -1
+ nop
+ nop
+ nop
+ blr
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
new file mode 100644
index 0000000000000..028aeae370b65
--- /dev/null
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -0,0 +1,52 @@
+/*
+ * ePAPR para-virtualization support.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ */
+
+#include <linux/of.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/cacheflush.h>
+#include <asm/code-patching.h>
+
+bool epapr_paravirt_enabled;
+
+static int __init epapr_paravirt_init(void)
+{
+ struct device_node *hyper_node;
+ const u32 *insts;
+ int len, i;
+
+ hyper_node = of_find_node_by_path("/hypervisor");
+ if (!hyper_node)
+ return -ENODEV;
+
+ insts = of_get_property(hyper_node, "hcall-instructions", &len);
+ if (!insts)
+ return -ENODEV;
+
+ if (len % 4 || len > (4 * 4))
+ return -ENODEV;
+
+ for (i = 0; i < (len / 4); i++)
+ patch_instruction(epapr_hypercall_start + i, insts[i]);
+
+ epapr_paravirt_enabled = true;
+
+ return 0;
+}
+
+early_initcall(epapr_paravirt_init);
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 62bdf23896698..1c13307e03088 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -31,6 +31,7 @@
#include <asm/cacheflush.h>
#include <asm/disassemble.h>
#include <asm/ppc-opcode.h>
+#include <asm/epapr_hcalls.h>
#define KVM_MAGIC_PAGE (-4096L)
#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
@@ -726,7 +727,7 @@ unsigned long kvm_hypercall(unsigned long *in,
unsigned long register r11 asm("r11") = nr;
unsigned long register r12 asm("r12");
- asm volatile("bl kvm_hypercall_start"
+ asm volatile("bl epapr_hypercall_start"
: "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
"=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
"=r"(r12)
@@ -747,29 +748,6 @@ unsigned long kvm_hypercall(unsigned long *in,
}
EXPORT_SYMBOL_GPL(kvm_hypercall);
-static int kvm_para_setup(void)
-{
- extern u32 kvm_hypercall_start;
- struct device_node *hyper_node;
- u32 *insts;
- int len, i;
-
- hyper_node = of_find_node_by_path("/hypervisor");
- if (!hyper_node)
- return -1;
-
- insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len);
- if (len % 4)
- return -1;
- if (len > (4 * 4))
- return -1;
-
- for (i = 0; i < (len / 4); i++)
- kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]);
-
- return 0;
-}
-
static __init void kvm_free_tmp(void)
{
unsigned long start, end;
@@ -791,7 +769,7 @@ static int __init kvm_guest_init(void)
if (!kvm_para_available())
goto free_tmp;
- if (kvm_para_setup())
+ if (!epapr_paravirt_enabled)
goto free_tmp;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index e291cf3cf954b..e100ff324a85c 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -24,16 +24,6 @@
#include <asm/page.h>
#include <asm/asm-offsets.h>
-/* Hypercall entry point. Will be patched with device tree instructions. */
-
-.global kvm_hypercall_start
-kvm_hypercall_start:
- li r3, -1
- nop
- nop
- nop
- blr
-
#define KVM_MAGIC_PAGE (-4096)
#ifdef CONFIG_64BIT
@@ -132,7 +122,7 @@ kvm_emulate_mtmsrd_len:
.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
-#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI)
+#define MSR_SAFE_BITS (MSR_EE | MSR_RI)
#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
.global kvm_emulate_mtmsr
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 80a5775175844..d03eb6f7b0584 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,56 +37,121 @@
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
-long kvmppc_alloc_hpt(struct kvm *kvm)
+/* Power architecture requires HPT is at least 256kB */
+#define PPC_MIN_HPT_ORDER 18
+
+long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
unsigned long hpt;
- long lpid;
struct revmap_entry *rev;
struct kvmppc_linear_info *li;
+ long order = kvm_hpt_order;
- /* Allocate guest's hashed page table */
- li = kvm_alloc_hpt();
- if (li) {
- /* using preallocated memory */
- hpt = (ulong)li->base_virt;
- kvm->arch.hpt_li = li;
- } else {
- /* using dynamic memory */
+ if (htab_orderp) {
+ order = *htab_orderp;
+ if (order < PPC_MIN_HPT_ORDER)
+ order = PPC_MIN_HPT_ORDER;
+ }
+
+ /*
+ * If the user wants a different size from default,
+ * try first to allocate it from the kernel page allocator.
+ */
+ hpt = 0;
+ if (order != kvm_hpt_order) {
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
- __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT);
+ __GFP_NOWARN, order - PAGE_SHIFT);
+ if (!hpt)
+ --order;
}
+ /* Next try to allocate from the preallocated pool */
if (!hpt) {
- pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
- return -ENOMEM;
+ li = kvm_alloc_hpt();
+ if (li) {
+ hpt = (ulong)li->base_virt;
+ kvm->arch.hpt_li = li;
+ order = kvm_hpt_order;
+ }
}
+
+ /* Lastly try successively smaller sizes from the page allocator */
+ while (!hpt && order > PPC_MIN_HPT_ORDER) {
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
+ __GFP_NOWARN, order - PAGE_SHIFT);
+ if (!hpt)
+ --order;
+ }
+
+ if (!hpt)
+ return -ENOMEM;
+
kvm->arch.hpt_virt = hpt;
+ kvm->arch.hpt_order = order;
+ /* HPTEs are 2**4 bytes long */
+ kvm->arch.hpt_npte = 1ul << (order - 4);
+ /* 128 (2**7) bytes in each HPTEG */
+ kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
/* Allocate reverse map array */
- rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
+ rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
if (!rev) {
pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
goto out_freehpt;
}
kvm->arch.revmap = rev;
+ kvm->arch.sdr1 = __pa(hpt) | (order - 18);
- lpid = kvmppc_alloc_lpid();
- if (lpid < 0)
- goto out_freeboth;
+ pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
+ hpt, order, kvm->arch.lpid);
- kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
- kvm->arch.lpid = lpid;
-
- pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
+ if (htab_orderp)
+ *htab_orderp = order;
return 0;
- out_freeboth:
- vfree(rev);
out_freehpt:
- free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
+ if (kvm->arch.hpt_li)
+ kvm_release_hpt(kvm->arch.hpt_li);
+ else
+ free_pages(hpt, order - PAGE_SHIFT);
return -ENOMEM;
}
+long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
+{
+ long err = -EBUSY;
+ long order;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->arch.rma_setup_done) {
+ kvm->arch.rma_setup_done = 0;
+ /* order rma_setup_done vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.rma_setup_done = 1;
+ goto out;
+ }
+ }
+ if (kvm->arch.hpt_virt) {
+ order = kvm->arch.hpt_order;
+ /* Set the entire HPT to 0, i.e. invalid HPTEs */
+ memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
+ /*
+ * Set the whole last_vcpu array to an invalid vcpu number.
+ * This ensures that each vcpu will flush its TLB on next entry.
+ */
+ memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu));
+ *htab_orderp = order;
+ err = 0;
+ } else {
+ err = kvmppc_alloc_hpt(kvm, htab_orderp);
+ order = *htab_orderp;
+ }
+ out:
+ mutex_unlock(&kvm->lock);
+ return err;
+}
+
void kvmppc_free_hpt(struct kvm *kvm)
{
kvmppc_free_lpid(kvm->arch.lpid);
@@ -94,7 +159,8 @@ void kvmppc_free_hpt(struct kvm *kvm)
if (kvm->arch.hpt_li)
kvm_release_hpt(kvm->arch.hpt_li);
else
- free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
+ free_pages(kvm->arch.hpt_virt,
+ kvm->arch.hpt_order - PAGE_SHIFT);
}
/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
@@ -119,6 +185,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
unsigned long psize;
unsigned long hp0, hp1;
long ret;
+ struct kvm *kvm = vcpu->kvm;
psize = 1ul << porder;
npages = memslot->npages >> (porder - PAGE_SHIFT);
@@ -127,8 +194,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
if (npages > 1ul << (40 - porder))
npages = 1ul << (40 - porder);
/* Can't use more than 1 HPTE per HPTEG */
- if (npages > HPT_NPTEG)
- npages = HPT_NPTEG;
+ if (npages > kvm->arch.hpt_mask + 1)
+ npages = kvm->arch.hpt_mask + 1;
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
@@ -138,7 +205,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
- hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
+ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask;
/*
* We assume that the hash table is empty and no
* vcpus are using it at this stage. Since we create
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3abe1b86e5834..83e929e66f9d8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -56,7 +56,7 @@
/* #define EXIT_DEBUG_INT */
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
-static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
@@ -1104,11 +1104,15 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
return -EINTR;
}
- /* On the first time here, set up VRMA or RMA */
+ atomic_inc(&vcpu->kvm->arch.vcpus_running);
+ /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
+ smp_mb();
+
+ /* On the first time here, set up HTAB and VRMA or RMA */
if (!vcpu->kvm->arch.rma_setup_done) {
- r = kvmppc_hv_setup_rma(vcpu);
+ r = kvmppc_hv_setup_htab_rma(vcpu);
if (r)
- return r;
+ goto out;
}
flush_fp_to_thread(current);
@@ -1126,6 +1130,9 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_core_prepare_to_enter(vcpu);
}
} while (r == RESUME_GUEST);
+
+ out:
+ atomic_dec(&vcpu->kvm->arch.vcpus_running);
return r;
}
@@ -1341,7 +1348,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
{
}
-static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
+static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
struct kvm *kvm = vcpu->kvm;
@@ -1360,6 +1367,15 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
if (kvm->arch.rma_setup_done)
goto out; /* another vcpu beat us to it */
+ /* Allocate hashed page table (if not done already) and reset it */
+ if (!kvm->arch.hpt_virt) {
+ err = kvmppc_alloc_hpt(kvm, NULL);
+ if (err) {
+ pr_err("KVM: Couldn't alloc HPT\n");
+ goto out;
+ }
+ }
+
/* Look up the memslot for guest physical address 0 */
memslot = gfn_to_memslot(kvm, 0);
@@ -1471,13 +1487,14 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
int kvmppc_core_init_vm(struct kvm *kvm)
{
- long r;
- unsigned long lpcr;
+ unsigned long lpcr, lpid;
- /* Allocate hashed page table */
- r = kvmppc_alloc_hpt(kvm);
- if (r)
- return r;
+ /* Allocate the guest's logical partition ID */
+
+ lpid = kvmppc_alloc_lpid();
+ if (lpid < 0)
+ return -ENOMEM;
+ kvm->arch.lpid = lpid;
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
@@ -1487,7 +1504,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
/* PPC970; HID4 is effectively the LPCR */
- unsigned long lpid = kvm->arch.lpid;
kvm->arch.host_lpid = 0;
kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index e1b60f56f2a1e..fb4eac290fefc 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -25,6 +25,9 @@ static void __init kvm_linear_init_one(ulong size, int count, int type);
static struct kvmppc_linear_info *kvm_alloc_linear(int type);
static void kvm_release_linear(struct kvmppc_linear_info *ri);
+int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
+EXPORT_SYMBOL_GPL(kvm_hpt_order);
+
/*************** RMA *************/
/*
@@ -209,7 +212,7 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri)
void __init kvm_linear_init(void)
{
/* HPT */
- kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT);
+ kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
/* RMA */
/* Only do this on PPC970 in HV mode */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index cec4daddbf316..5c70d19494f92 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -237,7 +237,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
/* Find and lock the HPTEG slot to use */
do_insert:
- if (pte_index >= HPT_NPTE)
+ if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
if (likely((flags & H_EXACT) == 0)) {
pte_index &= ~7UL;
@@ -352,7 +352,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long v, r, rb;
struct revmap_entry *rev;
- if (pte_index >= HPT_NPTE)
+ if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
@@ -419,7 +419,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
i = 4;
break;
}
- if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
+ if (req != 1 || flags == 3 ||
+ pte_index >= kvm->arch.hpt_npte) {
/* parameter error */
args[j] = ((0xa0 | flags) << 56) + pte_index;
ret = H_PARAMETER;
@@ -521,7 +522,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
struct revmap_entry *rev;
unsigned long v, r, rb, mask, bits;
- if (pte_index >= HPT_NPTE)
+ if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
@@ -583,7 +584,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
int i, n = 1;
struct revmap_entry *rev = NULL;
- if (pte_index >= HPT_NPTE)
+ if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
if (flags & H_READ_4) {
pte_index &= ~3;
@@ -678,7 +679,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
somask = (1UL << 28) - 1;
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
}
- hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK;
+ hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
avpn = slb_v & ~(somask >> 16); /* also includes B */
avpn |= (eaddr & somask) >> 16;
@@ -723,7 +724,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
if (val & HPTE_V_SECONDARY)
break;
val |= HPTE_V_SECONDARY;
- hash = hash ^ HPT_HASH_MASK;
+ hash = hash ^ kvm->arch.hpt_mask;
}
return -1;
}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 72f13f4a06e0d..86681eec60b1e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1267,6 +1267,11 @@ void kvmppc_decrementer_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+ if (vcpu->arch.tcr & TCR_ARE) {
+ vcpu->arch.dec = vcpu->arch.decar;
+ kvmppc_emulate_dec(vcpu);
+ }
+
kvmppc_set_tsr_bits(vcpu, TSR_DIS);
}
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 6c76397f2af48..9eb9809eb13e2 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -129,6 +129,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
kvmppc_set_tcr(vcpu, spr_val);
break;
+ case SPRN_DECAR:
+ vcpu->arch.decar = spr_val;
+ break;
/*
* Note: SPRG4-7 are user-readable.
* These values are loaded into the real SPRGs when resuming the
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 8b99e076dc818..e04b0ef55ce0e 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -269,6 +269,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
*spr_val = vcpu->arch.shared->mas7_3 >> 32;
break;
#endif
+ case SPRN_DECAR:
+ *spr_val = vcpu->arch.decar;
+ break;
case SPRN_TLB0CFG:
*spr_val = vcpu->arch.tlbcfg[0];
break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1493c8de947b9..87f4dc886076d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -246,6 +246,7 @@ int kvm_dev_ioctl_check_extension(long ext)
#endif
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
+ case KVM_CAP_PPC_ALLOC_HTAB:
r = 1;
break;
#endif /* CONFIG_PPC_BOOK3S_64 */
@@ -802,6 +803,23 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
+
+ case KVM_PPC_ALLOCATE_HTAB: {
+ struct kvm *kvm = filp->private_data;
+ u32 htab_order;
+
+ r = -EFAULT;
+ if (get_user(htab_order, (u32 __user *)argp))
+ break;
+ r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+ if (r)
+ break;
+ r = -EFAULT;
+ if (put_user(htab_order, (u32 __user *)argp))
+ break;
+ r = 0;
+ break;
+ }
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index a35ca44ade66a..e7a896acd9827 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -25,6 +25,7 @@ source "arch/powerpc/platforms/wsp/Kconfig"
config KVM_GUEST
bool "KVM Guest support"
default n
+ select EPAPR_PARAVIRT
---help---
This option enables various optimizations for running under the KVM
hypervisor. Overhead for the kernel when not running inside KVM should
@@ -32,6 +33,14 @@ config KVM_GUEST
In case of doubt, say Y
+config EPAPR_PARAVIRT
+ bool "ePAPR para-virtualization support"
+ default n
+ help
+ Enables ePAPR para-virtualization support for guests.
+
+ In case of doubt, say Y
+
config PPC_NATIVE
bool
depends on 6xx || PPC64
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index bf238c55740bc..173f07aaeb2b3 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -55,5 +55,7 @@ int sclp_chp_configure(struct chp_id chpid);
int sclp_chp_deconfigure(struct chp_id chpid);
int sclp_chp_read_info(struct sclp_chp_info *info);
void sclp_get_ipl_info(struct sclp_ipl_info *info);
+bool sclp_has_linemode(void);
+bool sclp_has_vt220(void);
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 489d1d8d96b06..e86bca6f975c5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -63,6 +63,7 @@
#include <asm/kvm_virtio.h>
#include <asm/diag.h>
#include <asm/os_info.h>
+#include <asm/sclp.h>
#include "entry.h"
long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |
@@ -138,9 +139,14 @@ __setup("condev=", condev_setup);
static void __init set_preferred_console(void)
{
- if (MACHINE_IS_KVM)
- add_preferred_console("hvc", 0, NULL);
- else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+ if (MACHINE_IS_KVM) {
+ if (sclp_has_vt220())
+ add_preferred_console("ttyS", 1, NULL);
+ else if (sclp_has_linemode())
+ add_preferred_console("ttyS", 0, NULL);
+ else
+ add_preferred_console("hvc", 0, NULL);
+ } else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
add_preferred_console("ttyS", 0, NULL);
else if (CONSOLE_IS_3270)
add_preferred_console("tty3270", 0, NULL);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 664766d0c83c6..ace93603d8610 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -347,6 +347,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.guest_fpregs.fpc = 0;
asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
vcpu->arch.sie_block->gbea = 1;
+ atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index a6983b2772201..72f5009deb5a0 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -264,6 +264,13 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
* This operation is non-atomic and can be reordered.
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
+ *
+ * Note: the operation is performed atomically with respect to
+ * the local CPU, but not other CPUs. Portable code should not
+ * rely on this behaviour.
+ * KVM relies on this behaviour on x86 for modifying memory that is also
+ * accessed from a hypervisor on the same CPU if running in a VM: don't change
+ * this without also updating arch/x86/kernel/kvm.c
*/
static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
{
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index e7d1c194d2728..246617efd67f5 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -12,6 +12,7 @@
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_DEVICE_ASSIGNMENT
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db7c1f2709a27..24b76474d9de0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -175,6 +175,13 @@ enum {
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
+/*
+ * The following bit is set with PV-EOI, unset on EOI.
+ * We detect PV-EOI changes by guest by comparing
+ * this bit with PV-EOI in guest memory.
+ * See the implementation in apic_update_pv_eoi.
+ */
+#define KVM_APIC_PV_EOI_PENDING 1
/*
* We don't want allocation failures within the mmu code, so we preallocate
@@ -484,6 +491,11 @@ struct kvm_vcpu_arch {
u64 length;
u64 status;
} osvw;
+
+ struct {
+ u64 msr_val;
+ struct gfn_to_hva_cache data;
+ } pv_eoi;
};
struct kvm_lpage_info {
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 63ab1661d00eb..2f7712e08b1e8 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -22,6 +22,7 @@
#define KVM_FEATURE_CLOCKSOURCE2 3
#define KVM_FEATURE_ASYNC_PF 4
#define KVM_FEATURE_STEAL_TIME 5
+#define KVM_FEATURE_PV_EOI 6
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
@@ -37,6 +38,7 @@
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
+#define MSR_KVM_PV_EOI_EN 0x4b564d04
struct kvm_steal_time {
__u64 steal;
@@ -89,6 +91,11 @@ struct kvm_vcpu_pv_apf_data {
__u32 enabled;
};
+#define KVM_PV_EOI_BIT 0
+#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
+#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
+#define KVM_PV_EOI_DISABLED 0x0
+
#ifdef __KERNEL__
#include <asm/processor.h>
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 31f180c21ce91..de007c2727307 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -404,6 +404,7 @@ enum vmcs_field {
#define VMX_EPTP_WB_BIT (1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
+#define VMX_EPT_AD_BIT (1ull << 21)
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
@@ -415,11 +416,14 @@ enum vmcs_field {
#define VMX_EPT_MAX_GAW 0x4
#define VMX_EPT_MT_EPTE_SHIFT 3
#define VMX_EPT_GAW_EPTP_SHIFT 3
+#define VMX_EPT_AD_ENABLE_BIT (1ull << 6)
#define VMX_EPT_DEFAULT_MT 0x6ull
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
#define VMX_EPT_IPAT_BIT (1ull << 6)
+#define VMX_EPT_ACCESS_BIT (1ull << 8)
+#define VMX_EPT_DIRTY_BIT (1ull << 9)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e554e5ad2fe8b..75ab94c75c7a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -39,6 +39,8 @@
#include <asm/desc.h>
#include <asm/tlbflush.h>
#include <asm/idle.h>
+#include <asm/apic.h>
+#include <asm/apicdef.h>
static int kvmapf = 1;
@@ -283,6 +285,22 @@ static void kvm_register_steal_time(void)
cpu, __pa(st));
}
+static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
+
+static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
+{
+ /**
+ * This relies on __test_and_clear_bit to modify the memory
+ * in a way that is atomic with respect to the local CPU.
+ * The hypervisor only accesses this memory from the local CPU so
+ * there's no need for lock or memory barriers.
+ * An optimization barrier is implied in apic write.
+ */
+ if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi)))
+ return;
+ apic->write(APIC_EOI, APIC_EOI_ACK);
+}
+
void __cpuinit kvm_guest_cpu_init(void)
{
if (!kvm_para_available())
@@ -300,11 +318,20 @@ void __cpuinit kvm_guest_cpu_init(void)
smp_processor_id());
}
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
+ unsigned long pa;
+ /* Size alignment is implied but just to make it explicit. */
+ BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
+ __get_cpu_var(kvm_apic_eoi) = 0;
+ pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED;
+ wrmsrl(MSR_KVM_PV_EOI_EN, pa);
+ }
+
if (has_steal_clock)
kvm_register_steal_time();
}
-static void kvm_pv_disable_apf(void *unused)
+static void kvm_pv_disable_apf(void)
{
if (!__get_cpu_var(apf_reason).enabled)
return;
@@ -316,11 +343,23 @@ static void kvm_pv_disable_apf(void *unused)
smp_processor_id());
}
+static void kvm_pv_guest_cpu_reboot(void *unused)
+{
+ /*
+ * We disable PV EOI before we load a new kernel by kexec,
+ * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
+ * New kernel can re-enable when it boots.
+ */
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+ wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ kvm_pv_disable_apf();
+}
+
static int kvm_pv_reboot_notify(struct notifier_block *nb,
unsigned long code, void *unused)
{
if (code == SYS_RESTART)
- on_each_cpu(kvm_pv_disable_apf, NULL, 1);
+ on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
return NOTIFY_DONE;
}
@@ -371,7 +410,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
static void kvm_guest_cpu_offline(void *dummy)
{
kvm_disable_steal_time();
- kvm_pv_disable_apf(NULL);
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+ wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ kvm_pv_disable_apf();
apf_task_wake_all();
}
@@ -424,6 +465,16 @@ void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock;
}
+ if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
+ struct apic **drv;
+
+ for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+ /* Should happen once for each apic */
+ WARN_ON((*drv)->eoi_write == kvm_guest_apic_eoi_write);
+ (*drv)->eoi_write = kvm_guest_apic_eoi_write;
+ }
+ }
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
register_cpu_notifier(&kvm_cpu_notifier);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7df1c6d839fb4..61ccbdf3d0ac2 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -409,6 +409,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_NOP_IO_DELAY) |
(1 << KVM_FEATURE_CLOCKSOURCE2) |
(1 << KVM_FEATURE_ASYNC_PF) |
+ (1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
if (sched_info_on())
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 93c15743f1ee1..ce878788a39fd 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -107,6 +107,16 @@ static inline void apic_clear_vector(int vec, void *bitmap)
clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}
+static inline int __apic_test_and_set_vector(int vec, void *bitmap)
+{
+ return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
+{
+ return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
static inline int apic_hw_enabled(struct kvm_lapic *apic)
{
return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
@@ -210,6 +220,16 @@ static int find_highest_vector(void *bitmap)
return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
}
+static u8 count_vectors(void *bitmap)
+{
+ u32 *word = bitmap;
+ int word_offset;
+ u8 count = 0;
+ for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
+ count += hweight32(word[word_offset << 2]);
+ return count;
+}
+
static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
{
apic->irr_pending = true;
@@ -242,6 +262,27 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
apic->irr_pending = true;
}
+static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
+{
+ if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
+ ++apic->isr_count;
+ BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
+ /*
+ * ISR (in service register) bit is set when injecting an interrupt.
+ * The highest vector is injected. Thus the latest bit set matches
+ * the highest bit in ISR.
+ */
+ apic->highest_isr_cache = vec;
+}
+
+static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
+{
+ if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
+ --apic->isr_count;
+ BUG_ON(apic->isr_count < 0);
+ apic->highest_isr_cache = -1;
+}
+
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
@@ -270,9 +311,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
irq->level, irq->trig_mode);
}
+static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
+{
+
+ return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
+ sizeof(val));
+}
+
+static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
+{
+
+ return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
+ sizeof(*val));
+}
+
+static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
+}
+
+static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
+{
+ u8 val;
+ if (pv_eoi_get_user(vcpu, &val) < 0)
+ apic_debug("Can't read EOI MSR value: 0x%llx\n",
+ (unsigned long long)vcpi->arch.pv_eoi.msr_val);
+ return val & 0x1;
+}
+
+static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
+{
+ if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
+ apic_debug("Can't set EOI MSR value: 0x%llx\n",
+ (unsigned long long)vcpi->arch.pv_eoi.msr_val);
+ return;
+ }
+ __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
+}
+
+static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
+{
+ if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
+ apic_debug("Can't clear EOI MSR value: 0x%llx\n",
+ (unsigned long long)vcpi->arch.pv_eoi.msr_val);
+ return;
+ }
+ __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
+}
+
static inline int apic_find_highest_isr(struct kvm_lapic *apic)
{
int result;
+ if (!apic->isr_count)
+ return -1;
+ if (likely(apic->highest_isr_cache != -1))
+ return apic->highest_isr_cache;
result = find_highest_vector(apic->regs + APIC_ISR);
ASSERT(result == -1 || result >= 16);
@@ -482,17 +575,20 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
}
-static void apic_set_eoi(struct kvm_lapic *apic)
+static int apic_set_eoi(struct kvm_lapic *apic)
{
int vector = apic_find_highest_isr(apic);
+
+ trace_kvm_eoi(apic, vector);
+
/*
* Not every write EOI will has corresponding ISR,
* one example is when Kernel check timer on setup_IO_APIC
*/
if (vector == -1)
- return;
+ return vector;
- apic_clear_vector(vector, apic->regs + APIC_ISR);
+ apic_clear_isr(vector, apic);
apic_update_ppr(apic);
if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
@@ -505,6 +601,7 @@ static void apic_set_eoi(struct kvm_lapic *apic)
kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
}
kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+ return vector;
}
static void apic_send_ipi(struct kvm_lapic *apic)
@@ -1081,10 +1178,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
apic->irr_pending = false;
+ apic->isr_count = 0;
+ apic->highest_isr_cache = -1;
update_divide_count(apic);
atomic_set(&apic->lapic_timer.pending, 0);
if (kvm_vcpu_is_bsp(vcpu))
vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ vcpu->arch.pv_eoi.msr_val = 0;
apic_update_ppr(apic);
vcpu->arch.apic_arb_prio = 0;
@@ -1248,7 +1348,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
if (vector == -1)
return -1;
- apic_set_vector(vector, apic->regs + APIC_ISR);
+ apic_set_isr(vector, apic);
apic_update_ppr(apic);
apic_clear_irr(vector, apic);
return vector;
@@ -1267,6 +1367,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
update_divide_count(apic);
start_apic_timer(apic);
apic->irr_pending = true;
+ apic->isr_count = count_vectors(apic->regs + APIC_ISR);
+ apic->highest_isr_cache = -1;
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
@@ -1283,11 +1385,51 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
+/*
+ * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
+ *
+ * Detect whether guest triggered PV EOI since the
+ * last entry. If yes, set EOI on guests's behalf.
+ * Clear PV EOI in guest memory in any case.
+ */
+static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
+ struct kvm_lapic *apic)
+{
+ bool pending;
+ int vector;
+ /*
+ * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
+ * and KVM_PV_EOI_ENABLED in guest memory as follows:
+ *
+ * KVM_APIC_PV_EOI_PENDING is unset:
+ * -> host disabled PV EOI.
+ * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
+ * -> host enabled PV EOI, guest did not execute EOI yet.
+ * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
+ * -> host enabled PV EOI, guest executed EOI.
+ */
+ BUG_ON(!pv_eoi_enabled(vcpu));
+ pending = pv_eoi_get_pending(vcpu);
+ /*
+ * Clear pending bit in any case: it will be set again on vmentry.
+ * While this might not be ideal from performance point of view,
+ * this makes sure pv eoi is only enabled when we know it's safe.
+ */
+ pv_eoi_clr_pending(vcpu);
+ if (pending)
+ return;
+ vector = apic_set_eoi(apic);
+ trace_kvm_pv_eoi(apic, vector);
+}
+
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
{
u32 data;
void *vapic;
+ if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
+ apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
+
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
@@ -1298,17 +1440,44 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
apic_set_tpr(vcpu->arch.apic, data & 0xff);
}
+/*
+ * apic_sync_pv_eoi_to_guest - called before vmentry
+ *
+ * Detect whether it's safe to enable PV EOI and
+ * if yes do so.
+ */
+static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
+ struct kvm_lapic *apic)
+{
+ if (!pv_eoi_enabled(vcpu) ||
+ /* IRR set or many bits in ISR: could be nested. */
+ apic->irr_pending ||
+ /* Cache not set: could be safe but we don't bother. */
+ apic->highest_isr_cache == -1 ||
+ /* Need EOI to update ioapic. */
+ kvm_ioapic_handles_vector(vcpu->kvm, apic->highest_isr_cache)) {
+ /*
+ * PV EOI was disabled by apic_sync_pv_eoi_from_guest
+ * so we need not do anything here.
+ */
+ return;
+ }
+
+ pv_eoi_set_pending(apic->vcpu);
+}
+
void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
{
u32 data, tpr;
int max_irr, max_isr;
- struct kvm_lapic *apic;
+ struct kvm_lapic *apic = vcpu->arch.apic;
void *vapic;
+ apic_sync_pv_eoi_to_guest(vcpu, apic);
+
if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
return;
- apic = vcpu->arch.apic;
tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
max_irr = apic_find_highest_irr(apic);
if (max_irr < 0)
@@ -1394,3 +1563,16 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}
+
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
+{
+ u64 addr = data & ~KVM_MSR_ENABLED;
+ if (!IS_ALIGNED(addr, 4))
+ return 1;
+
+ vcpu->arch.pv_eoi.msr_val = data;
+ if (!pv_eoi_enabled(vcpu))
+ return 0;
+ return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
+ addr);
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6f4ce2575d095..4af5405ae1e2f 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -13,6 +13,15 @@ struct kvm_lapic {
u32 divide_count;
struct kvm_vcpu *vcpu;
bool irr_pending;
+ /* Number of bits set in ISR. */
+ s16 isr_count;
+ /* The highest vector set in ISR; if -1 - invalid, must scan ISR. */
+ int highest_isr_cache;
+ /**
+ * APIC register page. The layout matches the register layout seen by
+ * the guest 1:1, because it is accessed by the vmx microcode.
+ * Note: Only one register, the TPR, is used by the microcode.
+ */
void *regs;
gpa_t vapic_addr;
struct page *vapic_page;
@@ -60,4 +69,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
}
+
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index be3cea4407ffa..3b53d9e08bfc3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -90,7 +90,7 @@ module_param(dbg, bool, 0644);
#define PTE_PREFETCH_NUM 8
-#define PT_FIRST_AVAIL_BITS_SHIFT 9
+#define PT_FIRST_AVAIL_BITS_SHIFT 10
#define PT64_SECOND_AVAIL_BITS_SHIFT 52
#define PT64_LEVEL_BITS 9
@@ -652,8 +652,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
mmu_page_header_cache);
}
-static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
- size_t size)
+static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
{
void *p;
@@ -664,8 +663,7 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu)
{
- return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache,
- sizeof(struct pte_list_desc));
+ return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache);
}
static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
@@ -1238,11 +1236,12 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
unsigned long data)
{
u64 *sptep;
- struct rmap_iterator iter;
+ struct rmap_iterator uninitialized_var(iter);
int young = 0;
/*
- * Emulate the accessed bit for EPT, by checking if this page has
+ * In case of absence of EPT Access and Dirty Bits supports,
+ * emulate the accessed bit for EPT, by checking if this page has
* an EPT mapping, and clearing it if it does. On the next access,
* a new EPT mapping will be established.
* This has some overhead, but not as much as the cost of swapping
@@ -1253,11 +1252,12 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
sptep = rmap_get_next(&iter)) {
- BUG_ON(!(*sptep & PT_PRESENT_MASK));
+ BUG_ON(!is_shadow_present_pte(*sptep));
- if (*sptep & PT_ACCESSED_MASK) {
+ if (*sptep & shadow_accessed_mask) {
young = 1;
- clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)sptep);
+ clear_bit((ffs(shadow_accessed_mask) - 1),
+ (unsigned long *)sptep);
}
}
@@ -1281,9 +1281,9 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
for (sptep = rmap_get_first(*rmapp, &iter); sptep;
sptep = rmap_get_next(&iter)) {
- BUG_ON(!(*sptep & PT_PRESENT_MASK));
+ BUG_ON(!is_shadow_present_pte(*sptep));
- if (*sptep & PT_ACCESSED_MASK) {
+ if (*sptep & shadow_accessed_mask) {
young = 1;
break;
}
@@ -1401,12 +1401,10 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
u64 *parent_pte, int direct)
{
struct kvm_mmu_page *sp;
- sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache,
- sizeof *sp);
- sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
+ sp = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache);
+ sp->spt = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
if (!direct)
- sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache,
- PAGE_SIZE);
+ sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM);
@@ -3942,7 +3940,6 @@ static void kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm,
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
{
struct kvm *kvm;
- struct kvm *kvm_freed = NULL;
int nr_to_scan = sc->nr_to_scan;
if (nr_to_scan == 0)
@@ -3954,22 +3951,30 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
int idx;
LIST_HEAD(invalid_list);
+ /*
+ * n_used_mmu_pages is accessed without holding kvm->mmu_lock
+ * here. We may skip a VM instance errorneosly, but we do not
+ * want to shrink a VM that only started to populate its MMU
+ * anyway.
+ */
+ if (kvm->arch.n_used_mmu_pages > 0) {
+ if (!nr_to_scan--)
+ break;
+ continue;
+ }
+
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
- if (!kvm_freed && nr_to_scan > 0 &&
- kvm->arch.n_used_mmu_pages > 0) {
- kvm_mmu_remove_some_alloc_mmu_pages(kvm,
- &invalid_list);
- kvm_freed = kvm;
- }
- nr_to_scan--;
+ kvm_mmu_remove_some_alloc_mmu_pages(kvm, &invalid_list);
kvm_mmu_commit_zap_page(kvm, &invalid_list);
+
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
+
+ list_move_tail(&kvm->vm_list, &vm_list);
+ break;
}
- if (kvm_freed)
- list_move_tail(&kvm_freed->vm_list, &vm_list);
raw_spin_unlock(&kvm_lock);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f75af406b268e..7a418783259d6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3185,8 +3185,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
break;
case MSR_IA32_DEBUGCTLMSR:
if (!boot_cpu_has(X86_FEATURE_LBRV)) {
- pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
- __func__, data);
+ vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
+ __func__, data);
break;
}
if (data & DEBUGCTL_RESERVED_BITS)
@@ -3205,7 +3205,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
case MSR_VM_CR:
return svm_set_vm_cr(vcpu, data);
case MSR_VM_IGNNE:
- pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
+ vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
default:
return kvm_set_msr_common(vcpu, ecx, data);
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 911d2641f14c5..851914e207fc5 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -517,6 +517,40 @@ TRACE_EVENT(kvm_apic_accept_irq,
__entry->coalesced ? " (coalesced)" : "")
);
+TRACE_EVENT(kvm_eoi,
+ TP_PROTO(struct kvm_lapic *apic, int vector),
+ TP_ARGS(apic, vector),
+
+ TP_STRUCT__entry(
+ __field( __u32, apicid )
+ __field( int, vector )
+ ),
+
+ TP_fast_assign(
+ __entry->apicid = apic->vcpu->vcpu_id;
+ __entry->vector = vector;
+ ),
+
+ TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector)
+);
+
+TRACE_EVENT(kvm_pv_eoi,
+ TP_PROTO(struct kvm_lapic *apic, int vector),
+ TP_ARGS(apic, vector),
+
+ TP_STRUCT__entry(
+ __field( __u32, apicid )
+ __field( int, vector )
+ ),
+
+ TP_fast_assign(
+ __entry->apicid = apic->vcpu->vcpu_id;
+ __entry->vector = vector;
+ ),
+
+ TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector)
+);
+
/*
* Tracepoint for nested VMRUN
*/
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 32eb588662920..eeeb4a25aed67 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -71,6 +71,9 @@ static bool __read_mostly enable_unrestricted_guest = 1;
module_param_named(unrestricted_guest,
enable_unrestricted_guest, bool, S_IRUGO);
+static bool __read_mostly enable_ept_ad_bits = 1;
+module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
+
static bool __read_mostly emulate_invalid_guest_state = 0;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
@@ -615,6 +618,10 @@ static void kvm_cpu_vmxon(u64 addr);
static void kvm_cpu_vmxoff(void);
static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
+static void vmx_set_segment(struct kvm_vcpu *vcpu,
+ struct kvm_segment *var, int seg);
+static void vmx_get_segment(struct kvm_vcpu *vcpu,
+ struct kvm_segment *var, int seg);
static DEFINE_PER_CPU(struct vmcs *, vmxarea);
static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
@@ -789,6 +796,11 @@ static inline bool cpu_has_vmx_ept_4levels(void)
return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
}
+static inline bool cpu_has_vmx_ept_ad_bits(void)
+{
+ return vmx_capability.ept & VMX_EPT_AD_BIT;
+}
+
static inline bool cpu_has_vmx_invept_individual_addr(void)
{
return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
@@ -2645,8 +2657,12 @@ static __init int hardware_setup(void)
!cpu_has_vmx_ept_4levels()) {
enable_ept = 0;
enable_unrestricted_guest = 0;
+ enable_ept_ad_bits = 0;
}
+ if (!cpu_has_vmx_ept_ad_bits())
+ enable_ept_ad_bits = 0;
+
if (!cpu_has_vmx_unrestricted_guest())
enable_unrestricted_guest = 0;
@@ -2770,6 +2786,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
{
unsigned long flags;
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_segment var;
if (enable_unrestricted_guest)
return;
@@ -2813,20 +2830,23 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
if (emulate_invalid_guest_state)
goto continue_rmode;
- vmcs_write16(GUEST_SS_SELECTOR, vmcs_readl(GUEST_SS_BASE) >> 4);
- vmcs_write32(GUEST_SS_LIMIT, 0xffff);
- vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
+ vmx_get_segment(vcpu, &var, VCPU_SREG_SS);
+ vmx_set_segment(vcpu, &var, VCPU_SREG_SS);
+
+ vmx_get_segment(vcpu, &var, VCPU_SREG_CS);
+ vmx_set_segment(vcpu, &var, VCPU_SREG_CS);
- vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
- vmcs_write32(GUEST_CS_LIMIT, 0xffff);
- if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
- vmcs_writel(GUEST_CS_BASE, 0xf0000);
- vmcs_write16(GUEST_CS_SELECTOR, vmcs_readl(GUEST_CS_BASE) >> 4);
+ vmx_get_segment(vcpu, &var, VCPU_SREG_ES);
+ vmx_set_segment(vcpu, &var, VCPU_SREG_ES);
- fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
- fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
- fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
- fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
+ vmx_get_segment(vcpu, &var, VCPU_SREG_DS);
+ vmx_set_segment(vcpu, &var, VCPU_SREG_DS);
+
+ vmx_get_segment(vcpu, &var, VCPU_SREG_GS);
+ vmx_set_segment(vcpu, &var, VCPU_SREG_GS);
+
+ vmx_get_segment(vcpu, &var, VCPU_SREG_FS);
+ vmx_set_segment(vcpu, &var, VCPU_SREG_FS);
continue_rmode:
kvm_mmu_reset_context(vcpu);
@@ -3027,6 +3047,8 @@ static u64 construct_eptp(unsigned long root_hpa)
/* TODO write the value reading from MSR */
eptp = VMX_EPT_DEFAULT_MT |
VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
+ if (enable_ept_ad_bits)
+ eptp |= VMX_EPT_AD_ENABLE_BIT;
eptp |= (root_hpa & PAGE_MASK);
return eptp;
@@ -3229,6 +3251,44 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
vmcs_write32(sf->ar_bytes, ar);
__clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail);
+
+ /*
+ * Fix segments for real mode guest in hosts that don't have
+ * "unrestricted_mode" or it was disabled.
+ * This is done to allow migration of the guests from hosts with
+ * unrestricted guest like Westmere to older host that don't have
+ * unrestricted guest like Nehelem.
+ */
+ if (!enable_unrestricted_guest && vmx->rmode.vm86_active) {
+ switch (seg) {
+ case VCPU_SREG_CS:
+ vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
+ vmcs_write32(GUEST_CS_LIMIT, 0xffff);
+ if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000)
+ vmcs_writel(GUEST_CS_BASE, 0xf0000);
+ vmcs_write16(GUEST_CS_SELECTOR,
+ vmcs_readl(GUEST_CS_BASE) >> 4);
+ break;
+ case VCPU_SREG_ES:
+ fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
+ break;
+ case VCPU_SREG_DS:
+ fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
+ break;
+ case VCPU_SREG_GS:
+ fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
+ break;
+ case VCPU_SREG_FS:
+ fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
+ break;
+ case VCPU_SREG_SS:
+ vmcs_write16(GUEST_SS_SELECTOR,
+ vmcs_readl(GUEST_SS_BASE) >> 4);
+ vmcs_write32(GUEST_SS_LIMIT, 0xffff);
+ vmcs_write32(GUEST_SS_AR_BYTES, 0xf3);
+ break;
+ }
+ }
}
static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -4489,7 +4549,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
break;
}
vcpu->run->exit_reason = 0;
- pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
+ vcpu_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
(int)(exit_qualification >> 4) & 3, cr);
return 0;
}
@@ -7275,8 +7335,10 @@ static int __init vmx_init(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
if (enable_ept) {
- kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
- VMX_EPT_EXECUTABLE_MASK);
+ kvm_mmu_set_mask_ptes(0ull,
+ (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+ (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+ 0ull, VMX_EPT_EXECUTABLE_MASK);
ept_set_mmio_spte_mask();
kvm_enable_tdp();
} else
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be6d54929fa7d..03ce38611657f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -795,6 +795,7 @@ static u32 msrs_to_save[] = {
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
+ MSR_KVM_PV_EOI_EN,
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_STAR,
#ifdef CONFIG_X86_64
@@ -1437,8 +1438,8 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
break;
}
default:
- pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
- "data 0x%llx\n", msr, data);
+ vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+ "data 0x%llx\n", msr, data);
return 1;
}
return 0;
@@ -1470,8 +1471,8 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
default:
- pr_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
- "data 0x%llx\n", msr, data);
+ vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
+ "data 0x%llx\n", msr, data);
return 1;
}
@@ -1551,15 +1552,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
data &= ~(u64)0x100; /* ignore ignne emulation enable */
data &= ~(u64)0x8; /* ignore TLB cache disable */
if (data != 0) {
- pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
- data);
+ vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
+ data);
return 1;
}
break;
case MSR_FAM10H_MMIO_CONF_BASE:
if (data != 0) {
- pr_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
- "0x%llx\n", data);
+ vcpu_unimpl(vcpu, "unimplemented MMIO_CONF_BASE wrmsr: "
+ "0x%llx\n", data);
return 1;
}
break;
@@ -1574,8 +1575,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
thus reserved and should throw a #GP */
return 1;
}
- pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
- __func__, data);
+ vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
+ __func__, data);
break;
case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
@@ -1653,6 +1654,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
break;
+ case MSR_KVM_PV_EOI_EN:
+ if (kvm_lapic_enable_pv_eoi(vcpu, data))
+ return 1;
+ break;
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
@@ -1671,8 +1676,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case MSR_K7_EVNTSEL2:
case MSR_K7_EVNTSEL3:
if (data != 0)
- pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
- "0x%x data 0x%llx\n", msr, data);
+ vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
+ "0x%x data 0x%llx\n", msr, data);
break;
/* at least RHEL 4 unconditionally writes to the perfctr registers,
* so we ignore writes to make it happy.
@@ -1681,8 +1686,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case MSR_K7_PERFCTR1:
case MSR_K7_PERFCTR2:
case MSR_K7_PERFCTR3:
- pr_unimpl(vcpu, "unimplemented perfctr wrmsr: "
- "0x%x data 0x%llx\n", msr, data);
+ vcpu_unimpl(vcpu, "unimplemented perfctr wrmsr: "
+ "0x%x data 0x%llx\n", msr, data);
break;
case MSR_P6_PERFCTR0:
case MSR_P6_PERFCTR1:
@@ -1693,8 +1698,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return kvm_pmu_set_msr(vcpu, msr, data);
if (pr || data != 0)
- pr_unimpl(vcpu, "disabled perfctr wrmsr: "
- "0x%x data 0x%llx\n", msr, data);
+ vcpu_unimpl(vcpu, "disabled perfctr wrmsr: "
+ "0x%x data 0x%llx\n", msr, data);
break;
case MSR_K7_CLK_CTL:
/*
@@ -1720,7 +1725,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
/* Drop writes to this legacy MSR -- see rdmsr
* counterpart for further detail.
*/
- pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
+ vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
break;
case MSR_AMD64_OSVW_ID_LENGTH:
if (!guest_cpuid_has_osvw(vcpu))
@@ -1738,12 +1743,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr, data);
if (!ignore_msrs) {
- pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
- msr, data);
+ vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
+ msr, data);
return 1;
} else {
- pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
- msr, data);
+ vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
+ msr, data);
break;
}
}
@@ -1846,7 +1851,7 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
data = kvm->arch.hv_hypercall;
break;
default:
- pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
}
@@ -1877,7 +1882,7 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
data = vcpu->arch.hv_vapic;
break;
default:
- pr_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
+ vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
}
*pdata = data;
@@ -2030,10 +2035,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_get_msr(vcpu, msr, pdata);
if (!ignore_msrs) {
- pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
+ vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
return 1;
} else {
- pr_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
+ vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr);
data = 0;
}
break;
@@ -3144,6 +3149,16 @@ out:
return r;
}
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+ if (!irqchip_in_kernel(kvm))
+ return -ENXIO;
+
+ irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event->irq, irq_event->level);
+ return 0;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -3250,29 +3265,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
create_pit_unlock:
mutex_unlock(&kvm->slots_lock);
break;
- case KVM_IRQ_LINE_STATUS:
- case KVM_IRQ_LINE: {
- struct kvm_irq_level irq_event;
-
- r = -EFAULT;
- if (copy_from_user(&irq_event, argp, sizeof irq_event))
- goto out;
- r = -ENXIO;
- if (irqchip_in_kernel(kvm)) {
- __s32 status;
- status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
- irq_event.irq, irq_event.level);
- if (ioctl == KVM_IRQ_LINE_STATUS) {
- r = -EFAULT;
- irq_event.status = status;
- if (copy_to_user(argp, &irq_event,
- sizeof irq_event))
- goto out;
- }
- r = 0;
- }
- break;
- }
case KVM_GET_IRQCHIP: {
/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
struct kvm_irqchip *chip;
@@ -4116,7 +4108,7 @@ static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr)
value = kvm_get_cr8(vcpu);
break;
default:
- vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
+ kvm_err("%s: unexpected cr %u\n", __func__, cr);
return 0;
}
@@ -4145,7 +4137,7 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
res = kvm_set_cr8(vcpu, val);
break;
default:
- vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
+ kvm_err("%s: unexpected cr %u\n", __func__, cr);
res = -1;
}
@@ -5296,8 +5288,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
r = kvm_mmu_reload(vcpu);
if (unlikely(r)) {
- kvm_x86_ops->cancel_injection(vcpu);
- goto out;
+ goto cancel_injection;
}
preempt_disable();
@@ -5322,9 +5313,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
smp_wmb();
local_irq_enable();
preempt_enable();
- kvm_x86_ops->cancel_injection(vcpu);
r = 1;
- goto out;
+ goto cancel_injection;
}
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -5388,9 +5378,16 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (unlikely(vcpu->arch.tsc_always_catchup))
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- kvm_lapic_sync_from_vapic(vcpu);
+ if (vcpu->arch.apic_attention)
+ kvm_lapic_sync_from_vapic(vcpu);
r = kvm_x86_ops->handle_exit(vcpu);
+ return r;
+
+cancel_injection:
+ kvm_x86_ops->cancel_injection(vcpu);
+ if (unlikely(vcpu->arch.apic_attention))
+ kvm_lapic_sync_from_vapic(vcpu);
out:
return r;
}
@@ -6304,7 +6301,7 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
- vfree(free->arch.lpage_info[i]);
+ kvm_kvfree(free->arch.lpage_info[i]);
free->arch.lpage_info[i] = NULL;
}
}
@@ -6323,7 +6320,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
slot->base_gfn, level) + 1;
slot->arch.lpage_info[i] =
- vzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
+ kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
if (!slot->arch.lpage_info[i])
goto out_free;
@@ -6350,7 +6347,7 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
out_free:
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
- vfree(slot->arch.lpage_info[i]);
+ kvm_kvfree(slot->arch.lpage_info[i]);
slot->arch.lpage_info[i] = NULL;
}
return -ENOMEM;
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 30f29a0020a12..3fcc000efc535 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -654,16 +654,6 @@ sclp_remove_processed(struct sccb_header *sccb)
EXPORT_SYMBOL(sclp_remove_processed);
-struct init_sccb {
- struct sccb_header header;
- u16 _reserved;
- u16 mask_length;
- sccb_mask_t receive_mask;
- sccb_mask_t send_mask;
- sccb_mask_t sclp_receive_mask;
- sccb_mask_t sclp_send_mask;
-} __attribute__((packed));
-
/* Prepare init mask request. Called while sclp_lock is locked. */
static inline void
__sclp_make_init_req(u32 receive_mask, u32 send_mask)
diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h
index 49a1bb52bc87a..d7e97ae9ef6de 100644
--- a/drivers/s390/char/sclp.h
+++ b/drivers/s390/char/sclp.h
@@ -88,6 +88,16 @@ struct sccb_header {
u16 response_code;
} __attribute__((packed));
+struct init_sccb {
+ struct sccb_header header;
+ u16 _reserved;
+ u16 mask_length;
+ sccb_mask_t receive_mask;
+ sccb_mask_t send_mask;
+ sccb_mask_t sclp_receive_mask;
+ sccb_mask_t sclp_send_mask;
+} __attribute__((packed));
+
extern u64 sclp_facilities;
#define SCLP_HAS_CHP_INFO (sclp_facilities & 0x8000000000000000ULL)
#define SCLP_HAS_CHP_RECONFIG (sclp_facilities & 0x2000000000000000ULL)
diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c
index 766cb7b19b403..71ea923c322d4 100644
--- a/drivers/s390/char/sclp_cmd.c
+++ b/drivers/s390/char/sclp_cmd.c
@@ -48,6 +48,7 @@ struct read_info_sccb {
u8 _reserved5[4096 - 112]; /* 112-4095 */
} __attribute__((packed, aligned(PAGE_SIZE)));
+static struct init_sccb __initdata early_event_mask_sccb __aligned(PAGE_SIZE);
static struct read_info_sccb __initdata early_read_info_sccb;
static int __initdata early_read_info_sccb_valid;
@@ -104,6 +105,19 @@ static void __init sclp_read_info_early(void)
}
}
+static void __init sclp_event_mask_early(void)
+{
+ struct init_sccb *sccb = &early_event_mask_sccb;
+ int rc;
+
+ do {
+ memset(sccb, 0, sizeof(*sccb));
+ sccb->header.length = sizeof(*sccb);
+ sccb->mask_length = sizeof(sccb_mask_t);
+ rc = sclp_cmd_sync_early(SCLP_CMDW_WRITE_EVENT_MASK, sccb);
+ } while (rc == -EBUSY);
+}
+
void __init sclp_facilities_detect(void)
{
struct read_info_sccb *sccb;
@@ -119,6 +133,30 @@ void __init sclp_facilities_detect(void)
rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2;
rzm <<= 20;
+
+ sclp_event_mask_early();
+}
+
+bool __init sclp_has_linemode(void)
+{
+ struct init_sccb *sccb = &early_event_mask_sccb;
+
+ if (sccb->header.response_code != 0x20)
+ return 0;
+ if (sccb->sclp_send_mask & (EVTYP_MSG_MASK | EVTYP_PMSGCMD_MASK))
+ return 1;
+ return 0;
+}
+
+bool __init sclp_has_vt220(void)
+{
+ struct init_sccb *sccb = &early_event_mask_sccb;
+
+ if (sccb->header.response_code != 0x20)
+ return 0;
+ if (sccb->sclp_send_mask & EVTYP_VT220MSG_MASK)
+ return 1;
+ return 0;
}
unsigned long long sclp_get_rnmax(void)
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index d74e9ae6dfb3e..d3bdae49512f7 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -25,6 +25,7 @@
#include <asm/io.h>
#include <asm/kvm_para.h>
#include <asm/kvm_virtio.h>
+#include <asm/sclp.h>
#include <asm/setup.h>
#include <asm/irq.h>
@@ -468,7 +469,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
static int __init s390_virtio_console_init(void)
{
- if (!MACHINE_IS_KVM)
+ if (sclp_has_vt220() || sclp_has_linemode())
return -ENODEV;
return virtio_cons_early_init(early_put_chars);
}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 09f2b3aa2da7e..f9784476c9267 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -111,6 +111,7 @@ struct kvm_irq_level {
* ACPI gsi notion of irq.
* For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
* For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
+ * For ARM: IRQ: irq = (2*vcpu_index). FIQ: irq = (2*vcpu_indx + 1).
*/
union {
__u32 irq;
@@ -617,6 +618,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_SIGNAL_MSI 77
#define KVM_CAP_PPC_GET_SMMU_INFO 78
#define KVM_CAP_S390_COW 79
+#define KVM_CAP_PPC_ALLOC_HTAB 80
#ifdef KVM_CAP_IRQ_ROUTING
@@ -828,6 +830,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
+/* Available with KVM_CAP_PPC_ALLOC_HTAB */
+#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32)
/*
* ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c4464356b35b0..96aa7fb26f87b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -306,7 +306,7 @@ struct kvm {
struct hlist_head irq_ack_notifier_list;
#endif
-#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
struct mmu_notifier mmu_notifier;
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
@@ -314,13 +314,19 @@ struct kvm {
long tlbs_dirty;
};
-/* The guest did something we don't support. */
-#define pr_unimpl(vcpu, fmt, ...) \
- pr_err_ratelimited("kvm: %i: cpu%i " fmt, \
- current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__)
+#define kvm_err(fmt, ...) \
+ pr_err("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
+#define kvm_info(fmt, ...) \
+ pr_info("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
+#define kvm_debug(fmt, ...) \
+ pr_debug("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
+#define kvm_pr_unimpl(fmt, ...) \
+ pr_err_ratelimited("kvm [%i]: " fmt, \
+ task_tgid_nr(current), ## __VA_ARGS__)
-#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
-#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
+/* The guest did something we don't support. */
+#define vcpu_unimpl(vcpu, fmt, ...) \
+ kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
@@ -488,6 +494,7 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
struct
kvm_userspace_memory_region *mem,
int user_alloc);
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
@@ -535,6 +542,9 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
void kvm_free_physmem(struct kvm *kvm);
+void *kvm_kvzalloc(unsigned long size);
+void kvm_kvfree(const void *addr);
+
#ifndef __KVM_HAVE_ARCH_VM_ALLOC
static inline struct kvm *kvm_arch_alloc_vm(void)
{
@@ -771,7 +781,7 @@ struct kvm_stats_debugfs_item {
extern struct kvm_stats_debugfs_item debugfs_entries[];
extern struct dentry *kvm_debugfs_dir;
-#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
{
if (unlikely(vcpu->kvm->mmu_notifier_count))
@@ -793,7 +803,7 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
}
#endif
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef KVM_CAP_IRQ_ROUTING
#define KVM_MAX_IRQ_ROUTES 1024
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 46e3cd8e197a5..7ef9e759f4990 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -13,7 +13,8 @@
ERSN(DEBUG), ERSN(HLT), ERSN(MMIO), ERSN(IRQ_WINDOW_OPEN), \
ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \
ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\
- ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI)
+ ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \
+ ERSN(S390_UCONTROL)
TRACE_EVENT(kvm_userspace_exit,
TP_PROTO(__u32 reason, int errno),
@@ -36,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit,
__entry->errno < 0 ? -__entry->errno : __entry->reason)
);
-#if defined(__KVM_HAVE_IOAPIC)
+#if defined(__KVM_HAVE_IRQ_LINE)
TRACE_EVENT(kvm_set_irq,
TP_PROTO(unsigned int gsi, int level, int irq_source_id),
TP_ARGS(gsi, level, irq_source_id),
@@ -56,7 +57,9 @@ TRACE_EVENT(kvm_set_irq,
TP_printk("gsi %u level %d source %d",
__entry->gsi, __entry->level, __entry->irq_source_id)
);
+#endif
+#if defined(__KVM_HAVE_IOAPIC)
#define kvm_deliver_mode \
{0x0, "Fixed"}, \
{0x1, "LowPrio"}, \
diff --git a/mm/memory.c b/mm/memory.c
index 2466d1250231f..2cdadb935ed81 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -379,12 +379,14 @@ void pgd_clear_bad(pgd_t *pgd)
pgd_ERROR(*pgd);
pgd_clear(pgd);
}
+EXPORT_SYMBOL_GPL(pgd_clear_bad);
void pud_clear_bad(pud_t *pud)
{
pud_ERROR(*pud);
pud_clear(pud);
}
+EXPORT_SYMBOL_GPL(pud_clear_bad);
void pmd_clear_bad(pmd_t *pmd)
{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7e140683ff14d..1d338773b0c39 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -516,16 +516,32 @@ out_err_nodisable:
return ERR_PTR(r);
}
+/*
+ * Avoid using vmalloc for a small buffer.
+ * Should not be used when the size is statically known.
+ */
+void *kvm_kvzalloc(unsigned long size)
+{
+ if (size > PAGE_SIZE)
+ return vzalloc(size);
+ else
+ return kzalloc(size, GFP_KERNEL);
+}
+
+void kvm_kvfree(const void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ vfree(addr);
+ else
+ kfree(addr);
+}
+
static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
{
if (!memslot->dirty_bitmap)
return;
- if (2 * kvm_dirty_bitmap_bytes(memslot) > PAGE_SIZE)
- vfree(memslot->dirty_bitmap);
- else
- kfree(memslot->dirty_bitmap);
-
+ kvm_kvfree(memslot->dirty_bitmap);
memslot->dirty_bitmap = NULL;
}
@@ -617,11 +633,7 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
#ifndef CONFIG_S390
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
- if (dirty_bytes > PAGE_SIZE)
- memslot->dirty_bitmap = vzalloc(dirty_bytes);
- else
- memslot->dirty_bitmap = kzalloc(dirty_bytes, GFP_KERNEL);
-
+ memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
if (!memslot->dirty_bitmap)
return -ENOMEM;
@@ -2081,6 +2093,25 @@ static long kvm_vm_ioctl(struct file *filp,
break;
}
#endif
+#ifdef __KVM_HAVE_IRQ_LINE
+ case KVM_IRQ_LINE_STATUS:
+ case KVM_IRQ_LINE: {
+ struct kvm_irq_level irq_event;
+
+ r = -EFAULT;
+ if (copy_from_user(&irq_event, argp, sizeof irq_event))
+ goto out;
+
+ r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
+
+ if (ioctl == KVM_IRQ_LINE_STATUS) {
+ if (copy_to_user(argp, &irq_event, sizeof irq_event))
+ r = -EFAULT;
+ }
+
+ break;
+ }
+#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
if (r == -ENOTTY)
@@ -2213,7 +2244,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
case KVM_CAP_SIGNAL_MSI:
#endif
return 1;
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
+#ifdef KVM_CAP_IRQ_ROUTING
case KVM_CAP_IRQ_ROUTING:
return KVM_MAX_IRQ_ROUTES;
#endif