aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2024-05-02 14:28:07 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2024-05-02 14:28:07 +1000
commit171334a1eb7e24a08ab1ca1aff1132f900094099 (patch)
tree3c5030cae5bda702b310f4115347a6326fe3b19d
parent8dc23c0c6de36e2b67f853a11c52cc939f686dd7 (diff)
parent5ef2f3d4e747c7851678ad2b70e37be886a8c9eb (diff)
downloadlinux-next-history-171334a1eb7e24a08ab1ca1aff1132f900094099.tar.gz
Merge branch 'riscv_kvm_next' of https://github.com/kvm-riscv/linux.git
Notice: this object is not reachable from any branch.
Notice: this object is not reachable from any branch.
-rw-r--r--arch/riscv/include/asm/csr.h5
-rw-r--r--arch/riscv/include/asm/kvm_host.h21
-rw-r--r--arch/riscv/include/asm/kvm_vcpu_pmu.h16
-rw-r--r--arch/riscv/include/asm/sbi.h38
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h1
-rw-r--r--arch/riscv/kernel/paravirt.c6
-rw-r--r--arch/riscv/kvm/aia.c5
-rw-r--r--arch/riscv/kvm/main.c18
-rw-r--r--arch/riscv/kvm/vcpu.c85
-rw-r--r--arch/riscv/kvm/vcpu_exit.c4
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c6
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c260
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c7
-rw-r--r--arch/riscv/kvm/vcpu_sbi_hsm.c42
-rw-r--r--arch/riscv/kvm/vcpu_sbi_pmu.c17
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c4
-rw-r--r--arch/riscv/kvm/vm.c1
-rw-r--r--drivers/perf/riscv_pmu.c3
-rw-r--r--drivers/perf/riscv_pmu_sbi.c316
-rw-r--r--include/linux/perf/riscv_pmu.h8
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h49
-rw-r--r--tools/testing/selftests/kvm/include/riscv/sbi.h141
-rw-r--r--tools/testing/selftests/kvm/include/riscv/ucall.h1
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c12
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c82
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c4
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c681
-rw-r--r--tools/testing/selftests/kvm/steal_time.c4
30 files changed, 1674 insertions, 167 deletions
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 2468c55933cd0d..25966995da04e0 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -168,7 +168,8 @@
#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
(_AC(1, UL) << IRQ_S_TIMER) | \
- (_AC(1, UL) << IRQ_S_EXT))
+ (_AC(1, UL) << IRQ_S_EXT) | \
+ (_AC(1, UL) << IRQ_PMU_OVF))
/* AIA CSR bits */
#define TOPI_IID_SHIFT 16
@@ -281,7 +282,7 @@
#define CSR_HPMCOUNTER30H 0xc9e
#define CSR_HPMCOUNTER31H 0xc9f
-#define CSR_SSCOUNTOVF 0xda0
+#define CSR_SCOUNTOVF 0xda0
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index 484d04a92fa6b7..d962812785868b 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -43,6 +43,17 @@
KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6)
+#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \
+ BIT(EXC_BREAKPOINT) | \
+ BIT(EXC_SYSCALL) | \
+ BIT(EXC_INST_PAGE_FAULT) | \
+ BIT(EXC_LOAD_PAGE_FAULT) | \
+ BIT(EXC_STORE_PAGE_FAULT))
+
+#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \
+ BIT(IRQ_VS_TIMER) | \
+ BIT(IRQ_VS_EXT))
+
enum kvm_riscv_hfence_type {
KVM_RISCV_HFENCE_UNKNOWN = 0,
KVM_RISCV_HFENCE_GVMA_VMID_GPA,
@@ -169,6 +180,7 @@ struct kvm_vcpu_csr {
struct kvm_vcpu_config {
u64 henvcfg;
u64 hstateen0;
+ unsigned long hedeleg;
};
struct kvm_vcpu_smstateen_csr {
@@ -211,6 +223,7 @@ struct kvm_vcpu_arch {
/* CPU context upon Guest VCPU reset */
struct kvm_cpu_context guest_reset_context;
+ spinlock_t reset_cntx_lock;
/* CPU CSR context upon Guest VCPU reset */
struct kvm_vcpu_csr guest_reset_csr;
@@ -252,8 +265,9 @@ struct kvm_vcpu_arch {
/* Cache pages needed to program page tables with spinlock held */
struct kvm_mmu_memory_cache mmu_page_cache;
- /* VCPU power-off state */
- bool power_off;
+ /* VCPU power state */
+ struct kvm_mp_state mp_state;
+ spinlock_t mp_state_lock;
/* Don't run the VCPU (blocked) */
bool pause;
@@ -374,8 +388,11 @@ int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu);
bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask);
+void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu);
+void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu);
+bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu);
diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h
index 395518a1664e00..fa0f535bbbf024 100644
--- a/arch/riscv/include/asm/kvm_vcpu_pmu.h
+++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h
@@ -20,7 +20,7 @@ static_assert(RISCV_KVM_MAX_COUNTERS <= 64);
struct kvm_fw_event {
/* Current value of the event */
- unsigned long value;
+ u64 value;
/* Event monitoring status */
bool started;
@@ -36,6 +36,7 @@ struct kvm_pmc {
bool started;
/* Monitoring event ID */
unsigned long event_idx;
+ struct kvm_vcpu *vcpu;
};
/* PMU data structure per vcpu */
@@ -50,6 +51,12 @@ struct kvm_pmu {
bool init_done;
/* Bit map of all the virtual counter used */
DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS);
+ /* Bit map of all the virtual counter overflown */
+ DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS);
+ /* The address of the counter snapshot area (guest physical address) */
+ gpa_t snapshot_addr;
+ /* The actual data of the snapshot */
+ struct riscv_pmu_snapshot_data *sdata;
};
#define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context)
@@ -82,9 +89,14 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
unsigned long ctr_mask, unsigned long flags,
unsigned long eidx, u64 evtdata,
struct kvm_vcpu_sbi_return *retdata);
-int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
+int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
struct kvm_vcpu_sbi_return *retdata);
+int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
+ struct kvm_vcpu_sbi_return *retdata);
void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
+ unsigned long saddr_high, unsigned long flags,
+ struct kvm_vcpu_sbi_return *retdata);
void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu);
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 6e68f8dff76bc6..112a0a0d9f46e9 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -131,6 +131,8 @@ enum sbi_ext_pmu_fid {
SBI_EXT_PMU_COUNTER_START,
SBI_EXT_PMU_COUNTER_STOP,
SBI_EXT_PMU_COUNTER_FW_READ,
+ SBI_EXT_PMU_COUNTER_FW_READ_HI,
+ SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
};
union sbi_pmu_ctr_info {
@@ -147,6 +149,13 @@ union sbi_pmu_ctr_info {
};
};
+/* Data structure to contain the pmu snapshot data */
+struct riscv_pmu_snapshot_data {
+ u64 ctr_overflow_mask;
+ u64 ctr_values[64];
+ u64 reserved[447];
+};
+
#define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0)
#define RISCV_PMU_RAW_EVENT_IDX 0x20000
@@ -232,20 +241,22 @@ enum sbi_pmu_ctr_type {
#define SBI_PMU_EVENT_IDX_INVALID 0xFFFFFFFF
/* Flags defined for config matching function */
-#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0)
-#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1)
-#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2)
-#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3)
-#define SBI_PMU_CFG_FLAG_SET_VSINH (1 << 4)
-#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5)
-#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6)
-#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7)
+#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
+#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
+#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
+#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
+#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
+#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
+#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
+#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
/* Flags defined for counter start function */
-#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0)
+#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
+#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
/* Flags defined for counter stop function */
-#define SBI_PMU_STOP_FLAG_RESET (1 << 0)
+#define SBI_PMU_STOP_FLAG_RESET BIT(0)
+#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
enum sbi_ext_dbcn_fid {
SBI_EXT_DBCN_CONSOLE_WRITE = 0,
@@ -266,7 +277,7 @@ struct sbi_sta_struct {
u8 pad[47];
} __packed;
-#define SBI_STA_SHMEM_DISABLE -1
+#define SBI_SHMEM_DISABLE -1
/* SBI spec version fields */
#define SBI_SPEC_VERSION_DEFAULT 0x1
@@ -284,6 +295,7 @@ struct sbi_sta_struct {
#define SBI_ERR_ALREADY_AVAILABLE -6
#define SBI_ERR_ALREADY_STARTED -7
#define SBI_ERR_ALREADY_STOPPED -8
+#define SBI_ERR_NO_SHMEM -9
extern unsigned long sbi_spec_version;
struct sbiret {
@@ -355,8 +367,8 @@ static inline unsigned long sbi_minor_version(void)
static inline unsigned long sbi_mk_version(unsigned long major,
unsigned long minor)
{
- return ((major & SBI_SPEC_VERSION_MAJOR_MASK) <<
- SBI_SPEC_VERSION_MAJOR_SHIFT) | minor;
+ return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
+ | (minor & SBI_SPEC_VERSION_MINOR_MASK);
}
int sbi_err_map_linux_errno(int err);
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index b1c503c2959c34..e878e7cc39784a 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZFA,
KVM_RISCV_ISA_EXT_ZTSO,
KVM_RISCV_ISA_EXT_ZACAS,
+ KVM_RISCV_ISA_EXT_SSCOFPMF,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c
index 0d6225fd3194e1..fa6b0339a65dea 100644
--- a/arch/riscv/kernel/paravirt.c
+++ b/arch/riscv/kernel/paravirt.c
@@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi,
ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM,
lo, hi, flags, 0, 0, 0);
if (ret.error) {
- if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE)
+ if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE)
pr_warn("Failed to disable steal-time shmem");
else
pr_warn("Failed to set steal-time shmem");
@@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu)
static int pv_time_cpu_down_prepare(unsigned int cpu)
{
- return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE,
- SBI_STA_SHMEM_DISABLE, 0);
+ return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE,
+ SBI_SHMEM_DISABLE, 0);
}
static u64 pv_time_steal_clock(int cpu)
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index a944294f6f23a7..0f0a9d11bb5ff1 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void)
enable_percpu_irq(hgei_parent_irq,
irq_get_trigger_type(hgei_parent_irq));
csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
+ /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+ csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF));
}
void kvm_riscv_aia_disable(void)
@@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void)
return;
hgctrl = get_cpu_ptr(&aia_hgei);
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+ csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF));
/* Disable per-CPU SGEI interrupt */
csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
disable_percpu_irq(hgei_parent_irq);
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 225a435d9c9a9c..bab2ec34cd8766 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -22,22 +22,8 @@ long kvm_arch_dev_ioctl(struct file *filp,
int kvm_arch_hardware_enable(void)
{
- unsigned long hideleg, hedeleg;
-
- hedeleg = 0;
- hedeleg |= (1UL << EXC_INST_MISALIGNED);
- hedeleg |= (1UL << EXC_BREAKPOINT);
- hedeleg |= (1UL << EXC_SYSCALL);
- hedeleg |= (1UL << EXC_INST_PAGE_FAULT);
- hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT);
- hedeleg |= (1UL << EXC_STORE_PAGE_FAULT);
- csr_write(CSR_HEDELEG, hedeleg);
-
- hideleg = 0;
- hideleg |= (1UL << IRQ_VS_SOFT);
- hideleg |= (1UL << IRQ_VS_TIMER);
- hideleg |= (1UL << IRQ_VS_EXT);
- csr_write(CSR_HIDELEG, hideleg);
+ csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
+ csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
/* VS should access only the time counter directly. Everything else should trap */
csr_write(CSR_HCOUNTEREN, 0x02);
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index b5ca9f2e98acd2..17e21df36cc1ec 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -64,7 +64,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
memcpy(csr, reset_csr, sizeof(*csr));
+ spin_lock(&vcpu->arch.reset_cntx_lock);
memcpy(cntx, reset_cntx, sizeof(*cntx));
+ spin_unlock(&vcpu->arch.reset_cntx_lock);
kvm_riscv_vcpu_fp_reset(vcpu);
@@ -102,6 +104,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *cntx;
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
+ spin_lock_init(&vcpu->arch.mp_state_lock);
+
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
@@ -119,12 +123,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
spin_lock_init(&vcpu->arch.hfence_lock);
/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
+ spin_lock_init(&vcpu->arch.reset_cntx_lock);
+
+ spin_lock(&vcpu->arch.reset_cntx_lock);
cntx = &vcpu->arch.guest_reset_context;
cntx->sstatus = SR_SPP | SR_SPIE;
cntx->hstatus = 0;
cntx->hstatus |= HSTATUS_VTW;
cntx->hstatus |= HSTATUS_SPVP;
cntx->hstatus |= HSTATUS_SPV;
+ spin_unlock(&vcpu->arch.reset_cntx_lock);
if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
return -ENOMEM;
@@ -201,7 +209,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
- !vcpu->arch.power_off && !vcpu->arch.pause);
+ !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -365,6 +373,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
}
}
+ /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
+ if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
+ if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
+ !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
+ clear_bit(IRQ_PMU_OVF, v->irqs_pending);
+ }
+
/* Sync-up AIA high interrupts */
kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
@@ -382,7 +397,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
- irq != IRQ_VS_EXT)
+ irq != IRQ_VS_EXT &&
+ irq != IRQ_PMU_OVF)
return -EINVAL;
set_bit(irq, vcpu->arch.irqs_pending);
@@ -397,14 +413,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
{
/*
- * We only allow VS-mode software, timer, and external
+ * We only allow VS-mode software, timer, counter overflow and external
* interrupts when irq is one of the local interrupts
* defined by RISC-V privilege specification.
*/
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
- irq != IRQ_VS_EXT)
+ irq != IRQ_VS_EXT &&
+ irq != IRQ_PMU_OVF)
return -EINVAL;
clear_bit(irq, vcpu->arch.irqs_pending);
@@ -429,26 +446,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
}
-void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
+void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.power_off = true;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
-void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
+void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.mp_state_lock);
+ __kvm_riscv_vcpu_power_off(vcpu);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+}
+
+void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
{
- vcpu->arch.power_off = false;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
kvm_vcpu_wake_up(vcpu);
}
+void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.mp_state_lock);
+ __kvm_riscv_vcpu_power_on(vcpu);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+}
+
+bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+ return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- if (vcpu->arch.power_off)
- mp_state->mp_state = KVM_MP_STATE_STOPPED;
- else
- mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+ *mp_state = READ_ONCE(vcpu->arch.mp_state);
return 0;
}
@@ -458,25 +491,36 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int ret = 0;
+ spin_lock(&vcpu->arch.mp_state_lock);
+
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.power_off = false;
+ WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
break;
case KVM_MP_STATE_STOPPED:
- kvm_riscv_vcpu_power_off(vcpu);
+ __kvm_riscv_vcpu_power_off(vcpu);
break;
default:
ret = -EINVAL;
}
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
return ret;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- /* TODO; To be implemented later. */
- return -EINVAL;
+ if (dbg->control & KVM_GUESTDBG_ENABLE) {
+ vcpu->guest_debug = dbg->control;
+ vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
+ } else {
+ vcpu->guest_debug = 0;
+ vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
+ }
+
+ return 0;
}
static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
@@ -505,6 +549,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
if (riscv_isa_extension_available(isa, SMSTATEEN))
cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
}
+
+ cfg->hedeleg = KVM_HEDELEG_DEFAULT;
+ if (vcpu->guest_debug)
+ cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -519,6 +567,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_VSEPC, csr->vsepc);
csr_write(CSR_VSCAUSE, csr->vscause);
csr_write(CSR_VSTVAL, csr->vstval);
+ csr_write(CSR_HEDELEG, cfg->hedeleg);
csr_write(CSR_HVIP, csr->hvip);
csr_write(CSR_VSATP, csr->vsatp);
csr_write(CSR_HENVCFG, cfg->henvcfg);
@@ -584,11 +633,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
kvm_vcpu_srcu_read_unlock(vcpu);
rcuwait_wait_event(wait,
- (!vcpu->arch.power_off) && (!vcpu->arch.pause),
+ (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
kvm_vcpu_srcu_read_lock(vcpu);
- if (vcpu->arch.power_off || vcpu->arch.pause) {
+ if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
/*
* Awaken to handle a signal, request to
* sleep again later.
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 2415722c01b8e9..5761f95abb60be 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -204,6 +204,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
break;
+ case EXC_BREAKPOINT:
+ run->exit_reason = KVM_EXIT_DEBUG;
+ ret = 0;
+ break;
default:
break;
}
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index 994adc26db4b10..c676275ea0a017 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
/* Multi letter extensions (alphabetically sorted) */
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
@@ -99,6 +100,9 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
switch (ext) {
case KVM_RISCV_ISA_EXT_H:
return false;
+ case KVM_RISCV_ISA_EXT_SSCOFPMF:
+ /* Sscofpmf depends on interrupt filtering defined in ssaia */
+ return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA);
case KVM_RISCV_ISA_EXT_V:
return riscv_v_vstate_ctrl_user_allowed();
default:
@@ -116,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_C:
case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M:
+ /* There is not architectural config bit to disable sscofpmf completely */
+ case KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_RISCV_ISA_EXT_SSTC:
case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_SVNAPOT:
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index 86391a5061dda9..04db1f993c4759 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -14,6 +14,7 @@
#include <asm/csr.h>
#include <asm/kvm_vcpu_sbi.h>
#include <asm/kvm_vcpu_pmu.h>
+#include <asm/sbi.h>
#include <linux/bitops.h>
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
@@ -39,7 +40,7 @@ static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
u64 sample_period;
if (!pmc->counter_val)
- sample_period = counter_val_mask + 1;
+ sample_period = counter_val_mask;
else
sample_period = (-pmc->counter_val) & counter_val_mask;
@@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
}
+static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
+ unsigned long *out_val)
+{
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
+ int fevent_code;
+
+ if (!IS_ENABLED(CONFIG_32BIT)) {
+ pr_warn("%s: should be invoked for only RV32\n", __func__);
+ return -EINVAL;
+ }
+
+ if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
+ pr_warn("Invalid counter id [%ld]during read\n", cidx);
+ return -EINVAL;
+ }
+
+ pmc = &kvpmu->pmc[cidx];
+
+ if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
+ return -EINVAL;
+
+ fevent_code = get_event_code(pmc->event_idx);
+ pmc->counter_val = kvpmu->fw_event[fevent_code].value;
+
+ *out_val = pmc->counter_val >> 32;
+
+ return 0;
+}
+
static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
unsigned long *out_val)
{
@@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
u64 enabled, running;
int fevent_code;
+ if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
+ pr_warn("Invalid counter id [%ld] during read\n", cidx);
+ return -EINVAL;
+ }
+
pmc = &kvpmu->pmc[cidx];
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
@@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct
return 0;
}
-static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
- unsigned long flags, unsigned long eidx, unsigned long evtdata)
+static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+ struct kvm_vcpu *vcpu = pmc->vcpu;
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
+ u64 period;
+
+ /*
+ * Stop the event counting by directly accessing the perf_event.
+ * Otherwise, this needs to deferred via a workqueue.
+ * That will introduce skew in the counter value because the actual
+ * physical counter would start after returning from this function.
+ * It will be stopped again once the workqueue is scheduled
+ */
+ rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
+
+ /*
+ * The hw counter would start automatically when this function returns.
+ * Thus, the host may continue to interrupt and inject it to the guest
+ * even without the guest configuring the next event. Depending on the hardware
+ * the host may have some sluggishness only if privilege mode filtering is not
+ * available. In an ideal world, where qemu is not the only capable hardware,
+ * this can be removed.
+ * FYI: ARM64 does this way while x86 doesn't do anything as such.
+ * TODO: Should we keep it for RISC-V ?
+ */
+ period = -(local64_read(&perf_event->count));
+
+ local64_set(&perf_event->hw.period_left, 0);
+ perf_event->attr.sample_period = period;
+ perf_event->hw.sample_period = period;
+
+ set_bit(pmc->idx, kvpmu->pmc_overflown);
+ kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
+
+ rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
+}
+
+static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
+ unsigned long flags, unsigned long eidx,
+ unsigned long evtdata)
{
struct perf_event *event;
@@ -247,7 +325,7 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr
*/
attr->sample_period = kvm_pmu_get_sample_period(pmc);
- event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
+ event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
@@ -310,6 +388,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
return ret;
}
+static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
+
+ if (kvpmu->sdata) {
+ if (kvpmu->snapshot_addr != INVALID_GPA) {
+ memset(kvpmu->sdata, 0, snapshot_area_size);
+ kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr,
+ kvpmu->sdata, snapshot_area_size);
+ } else {
+ pr_warn("snapshot address invalid\n");
+ }
+ kfree(kvpmu->sdata);
+ kvpmu->sdata = NULL;
+ }
+ kvpmu->snapshot_addr = INVALID_GPA;
+}
+
+int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
+ unsigned long saddr_high, unsigned long flags,
+ struct kvm_vcpu_sbi_return *retdata)
+{
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
+ int sbiret = 0;
+ gpa_t saddr;
+ unsigned long hva;
+ bool writable;
+
+ if (!kvpmu || flags) {
+ sbiret = SBI_ERR_INVALID_PARAM;
+ goto out;
+ }
+
+ if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
+ kvm_pmu_clear_snapshot_area(vcpu);
+ return 0;
+ }
+
+ saddr = saddr_low;
+
+ if (saddr_high != 0) {
+ if (IS_ENABLED(CONFIG_32BIT))
+ saddr |= ((gpa_t)saddr_high << 32);
+ else
+ sbiret = SBI_ERR_INVALID_ADDRESS;
+ goto out;
+ }
+
+ hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
+ if (kvm_is_error_hva(hva) || !writable) {
+ sbiret = SBI_ERR_INVALID_ADDRESS;
+ goto out;
+ }
+
+ kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
+ if (!kvpmu->sdata)
+ return -ENOMEM;
+
+ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
+ kfree(kvpmu->sdata);
+ sbiret = SBI_ERR_FAILURE;
+ goto out;
+ }
+
+ kvpmu->snapshot_addr = saddr;
+
+out:
+ retdata->err_val = sbiret;
+
+ return 0;
+}
+
int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
struct kvm_vcpu_sbi_return *retdata)
{
@@ -343,20 +495,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
int i, pmc_index, sbiret = 0;
struct kvm_pmc *pmc;
int fevent_code;
+ bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
sbiret = SBI_ERR_INVALID_PARAM;
goto out;
}
+ if (snap_flag_set) {
+ if (kvpmu->snapshot_addr == INVALID_GPA) {
+ sbiret = SBI_ERR_NO_SHMEM;
+ goto out;
+ }
+ if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
+ sizeof(struct riscv_pmu_snapshot_data))) {
+ pr_warn("Unable to read snapshot shared memory while starting counters\n");
+ sbiret = SBI_ERR_FAILURE;
+ goto out;
+ }
+ }
/* Start the counters that have been configured and requested by the guest */
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
pmc_index = i + ctr_base;
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
continue;
+ /* The guest started the counter again. Reset the overflow status */
+ clear_bit(pmc_index, kvpmu->pmc_overflown);
pmc = &kvpmu->pmc[pmc_index];
- if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE)
+ if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
pmc->counter_val = ival;
+ } else if (snap_flag_set) {
+ /* The counter index in the snapshot are relative to the counter base */
+ pmc->counter_val = kvpmu->sdata->ctr_values[i];
+ }
+
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
fevent_code = get_event_code(pmc->event_idx);
if (fevent_code >= SBI_PMU_FW_MAX) {
@@ -400,12 +572,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
u64 enabled, running;
struct kvm_pmc *pmc;
int fevent_code;
+ bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
+ bool shmem_needs_update = false;
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
sbiret = SBI_ERR_INVALID_PARAM;
goto out;
}
+ if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
+ sbiret = SBI_ERR_NO_SHMEM;
+ goto out;
+ }
+
/* Stop the counters that have been configured and requested by the guest */
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
pmc_index = i + ctr_base;
@@ -432,21 +611,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
sbiret = SBI_ERR_ALREADY_STOPPED;
}
- if (flags & SBI_PMU_STOP_FLAG_RESET) {
- /* Relase the counter if this is a reset request */
- pmc->counter_val += perf_event_read_value(pmc->perf_event,
- &enabled, &running);
+ if (flags & SBI_PMU_STOP_FLAG_RESET)
+ /* Release the counter if this is a reset request */
kvm_pmu_release_perf_event(pmc);
- }
} else {
sbiret = SBI_ERR_INVALID_PARAM;
}
+
+ if (snap_flag_set && !sbiret) {
+ if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
+ pmc->counter_val = kvpmu->fw_event[fevent_code].value;
+ else if (pmc->perf_event)
+ pmc->counter_val += perf_event_read_value(pmc->perf_event,
+ &enabled, &running);
+ /*
+ * The counter and overflow indicies in the snapshot region are w.r.to
+ * cbase. Modify the set bit in the counter mask instead of the pmc_index
+ * which indicates the absolute counter index.
+ */
+ if (test_bit(pmc_index, kvpmu->pmc_overflown))
+ kvpmu->sdata->ctr_overflow_mask |= BIT(i);
+ kvpmu->sdata->ctr_values[i] = pmc->counter_val;
+ shmem_needs_update = true;
+ }
+
if (flags & SBI_PMU_STOP_FLAG_RESET) {
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
clear_bit(pmc_index, kvpmu->pmc_in_use);
+ clear_bit(pmc_index, kvpmu->pmc_overflown);
+ if (snap_flag_set) {
+ /*
+ * Only clear the given counter as the caller is responsible to
+ * validate both the overflow mask and configured counters.
+ */
+ kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
+ shmem_needs_update = true;
+ }
}
}
+ if (shmem_needs_update)
+ kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
+ sizeof(struct riscv_pmu_snapshot_data));
+
out:
retdata->err_val = sbiret;
@@ -458,7 +665,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
unsigned long eidx, u64 evtdata,
struct kvm_vcpu_sbi_return *retdata)
{
- int ctr_idx, ret, sbiret = 0;
+ int ctr_idx, sbiret = 0;
+ long ret;
bool is_fevent;
unsigned long event_code;
u32 etype = kvm_pmu_get_perf_event_type(eidx);
@@ -517,8 +725,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
kvpmu->fw_event[event_code].started = true;
} else {
ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
- if (ret)
- return ret;
+ if (ret) {
+ sbiret = SBI_ERR_NOT_SUPPORTED;
+ goto out;
+ }
}
set_bit(ctr_idx, kvpmu->pmc_in_use);
@@ -530,7 +740,19 @@ out:
return 0;
}
-int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
+int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
+ struct kvm_vcpu_sbi_return *retdata)
+{
+ int ret;
+
+ ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
+ if (ret == -EINVAL)
+ retdata->err_val = SBI_ERR_INVALID_PARAM;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
struct kvm_vcpu_sbi_return *retdata)
{
int ret;
@@ -566,6 +788,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
+ kvpmu->snapshot_addr = INVALID_GPA;
if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
@@ -585,6 +808,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
pmc = &kvpmu->pmc[i];
pmc->idx = i;
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
+ pmc->vcpu = vcpu;
if (i < kvpmu->num_hw_ctrs) {
pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
if (i < 3)
@@ -601,7 +825,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
pmc->cinfo.csr = CSR_CYCLE + i;
} else {
pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
- pmc->cinfo.width = BITS_PER_LONG - 1;
+ pmc->cinfo.width = 63;
}
}
@@ -617,14 +841,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
if (!kvpmu)
return;
- for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
+ for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
pmc = &kvpmu->pmc[i];
pmc->counter_val = 0;
kvm_pmu_release_perf_event(pmc);
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
}
- bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
+ bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
+ bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
+ kvm_pmu_clear_snapshot_area(vcpu);
}
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 72a2ffb8dcd158..62f409d4176e41 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
unsigned long i;
struct kvm_vcpu *tmp;
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.power_off = true;
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ spin_lock(&vcpu->arch.mp_state_lock);
+ WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+ }
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&run->system_event, 0, sizeof(run->system_event));
diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c
index 7dca0e9381d9a5..dce667f4b6ab08 100644
--- a/arch/riscv/kvm/vcpu_sbi_hsm.c
+++ b/arch/riscv/kvm/vcpu_sbi_hsm.c
@@ -18,13 +18,20 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
struct kvm_vcpu *target_vcpu;
unsigned long target_vcpuid = cp->a0;
+ int ret = 0;
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu)
return SBI_ERR_INVALID_PARAM;
- if (!target_vcpu->arch.power_off)
- return SBI_ERR_ALREADY_AVAILABLE;
+ spin_lock(&target_vcpu->arch.mp_state_lock);
+
+ if (!kvm_riscv_vcpu_stopped(target_vcpu)) {
+ ret = SBI_ERR_ALREADY_AVAILABLE;
+ goto out;
+ }
+
+ spin_lock(&target_vcpu->arch.reset_cntx_lock);
reset_cntx = &target_vcpu->arch.guest_reset_context;
/* start address */
reset_cntx->sepc = cp->a1;
@@ -32,21 +39,35 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
reset_cntx->a0 = target_vcpuid;
/* private data passed from kernel */
reset_cntx->a1 = cp->a2;
+ spin_unlock(&target_vcpu->arch.reset_cntx_lock);
+
kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
- kvm_riscv_vcpu_power_on(target_vcpu);
+ __kvm_riscv_vcpu_power_on(target_vcpu);
- return 0;
+out:
+ spin_unlock(&target_vcpu->arch.mp_state_lock);
+
+ return ret;
}
static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.power_off)
- return SBI_ERR_FAILURE;
+ int ret = 0;
- kvm_riscv_vcpu_power_off(vcpu);
+ spin_lock(&vcpu->arch.mp_state_lock);
- return 0;
+ if (kvm_riscv_vcpu_stopped(vcpu)) {
+ ret = SBI_ERR_FAILURE;
+ goto out;
+ }
+
+ __kvm_riscv_vcpu_power_off(vcpu);
+
+out:
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
+ return ret;
}
static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
@@ -58,7 +79,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu)
return SBI_ERR_INVALID_PARAM;
- if (!target_vcpu->arch.power_off)
+ if (!kvm_riscv_vcpu_stopped(target_vcpu))
return SBI_HSM_STATE_STARTED;
else if (vcpu->stat.generic.blocking)
return SBI_HSM_STATE_SUSPENDED;
@@ -71,14 +92,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
{
int ret = 0;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
- struct kvm *kvm = vcpu->kvm;
unsigned long funcid = cp->a6;
switch (funcid) {
case SBI_EXT_HSM_HART_START:
- mutex_lock(&kvm->lock);
ret = kvm_sbi_hsm_vcpu_start(vcpu);
- mutex_unlock(&kvm->lock);
break;
case SBI_EXT_HSM_HART_STOP:
ret = kvm_sbi_hsm_vcpu_stop(vcpu);
diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c
index 7eca72df2cbd61..e4be34e03e8384 100644
--- a/arch/riscv/kvm/vcpu_sbi_pmu.c
+++ b/arch/riscv/kvm/vcpu_sbi_pmu.c
@@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
#endif
/*
* This can fail if perf core framework fails to create an event.
- * Forward the error to userspace because it's an error which
- * happened within the host kernel. The other option would be
- * to convert to an SBI error and forward to the guest.
+ * No need to forward the error to userspace and exit the guest.
+ * The operation can continue without profiling. Forward the
+ * appropriate SBI error to the guest.
*/
ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1,
cp->a2, cp->a3, temp, retdata);
@@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata);
break;
case SBI_EXT_PMU_COUNTER_FW_READ:
- ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata);
+ ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata);
+ break;
+ case SBI_EXT_PMU_COUNTER_FW_READ_HI:
+ if (IS_ENABLED(CONFIG_32BIT))
+ ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata);
+ else
+ retdata->out_val = 0;
+ break;
+ case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM:
+ ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata);
break;
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
index d8cf9ca28c616e..5f35427114c1da 100644
--- a/arch/riscv/kvm/vcpu_sbi_sta.c
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu)
if (flags != 0)
return SBI_ERR_INVALID_PARAM;
- if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE &&
- shmem_phys_hi == SBI_STA_SHMEM_DISABLE) {
+ if (shmem_phys_lo == SBI_SHMEM_DISABLE &&
+ shmem_phys_hi == SBI_SHMEM_DISABLE) {
vcpu->arch.sta.shmem = INVALID_GPA;
return 0;
}
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index ce58bc48e5b87b..7396b8654f454b 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_READONLY_MEM:
case KVM_CAP_MP_STATE:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index b4efdddb2ad91f..78c490e0505af3 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -191,8 +191,6 @@ void riscv_pmu_stop(struct perf_event *event, int flags)
struct hw_perf_event *hwc = &event->hw;
struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
- WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
-
if (!(hwc->state & PERF_HES_STOPPED)) {
if (rvpmu->ctr_stop) {
rvpmu->ctr_stop(event, 0);
@@ -408,6 +406,7 @@ struct riscv_pmu *riscv_pmu_alloc(void)
cpuc->n_events = 0;
for (i = 0; i < RISCV_MAX_COUNTERS; i++)
cpuc->events[i] = NULL;
+ cpuc->snapshot_addr = NULL;
}
pmu->pmu = (struct pmu) {
.event_init = riscv_pmu_event_init,
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 82636273d72659..a2e4005e1fd01c 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -27,7 +27,7 @@
#define ALT_SBI_PMU_OVERFLOW(__ovl) \
asm volatile(ALTERNATIVE_2( \
- "csrr %0, " __stringify(CSR_SSCOUNTOVF), \
+ "csrr %0, " __stringify(CSR_SCOUNTOVF), \
"csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \
THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \
CONFIG_ERRATA_THEAD_PMU, \
@@ -57,6 +57,11 @@ asm volatile(ALTERNATIVE( \
PMU_FORMAT_ATTR(event, "config:0-47");
PMU_FORMAT_ATTR(firmware, "config:63");
+static bool sbi_v2_available;
+static DEFINE_STATIC_KEY_FALSE(sbi_pmu_snapshot_available);
+#define sbi_pmu_snapshot_available() \
+ static_branch_unlikely(&sbi_pmu_snapshot_available)
+
static struct attribute *riscv_arch_formats_attr[] = {
&format_attr_event.attr,
&format_attr_firmware.attr,
@@ -384,7 +389,7 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
cmask = 1;
} else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
- cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
+ cmask = BIT(CSR_INSTRET - CSR_CYCLE);
}
}
@@ -506,24 +511,126 @@ static int pmu_sbi_event_map(struct perf_event *event, u64 *econfig)
return ret;
}
+static void pmu_sbi_snapshot_free(struct riscv_pmu *pmu)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
+
+ if (!cpu_hw_evt->snapshot_addr)
+ continue;
+
+ free_page((unsigned long)cpu_hw_evt->snapshot_addr);
+ cpu_hw_evt->snapshot_addr = NULL;
+ cpu_hw_evt->snapshot_addr_phys = 0;
+ }
+}
+
+static int pmu_sbi_snapshot_alloc(struct riscv_pmu *pmu)
+{
+ int cpu;
+ struct page *snapshot_page;
+
+ for_each_possible_cpu(cpu) {
+ struct cpu_hw_events *cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
+
+ snapshot_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!snapshot_page) {
+ pmu_sbi_snapshot_free(pmu);
+ return -ENOMEM;
+ }
+ cpu_hw_evt->snapshot_addr = page_to_virt(snapshot_page);
+ cpu_hw_evt->snapshot_addr_phys = page_to_phys(snapshot_page);
+ }
+
+ return 0;
+}
+
+static int pmu_sbi_snapshot_disable(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, SBI_SHMEM_DISABLE,
+ SBI_SHMEM_DISABLE, 0, 0, 0, 0);
+ if (ret.error) {
+ pr_warn("failed to disable snapshot shared memory\n");
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ return 0;
+}
+
+static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu)
+{
+ struct cpu_hw_events *cpu_hw_evt;
+ struct sbiret ret = {0};
+
+ cpu_hw_evt = per_cpu_ptr(pmu->hw_events, cpu);
+ if (!cpu_hw_evt->snapshot_addr_phys)
+ return -EINVAL;
+
+ if (cpu_hw_evt->snapshot_set_done)
+ return 0;
+
+ if (IS_ENABLED(CONFIG_32BIT))
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ cpu_hw_evt->snapshot_addr_phys,
+ (u64)(cpu_hw_evt->snapshot_addr_phys) >> 32, 0, 0, 0, 0);
+ else
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ cpu_hw_evt->snapshot_addr_phys, 0, 0, 0, 0, 0);
+
+ /* Free up the snapshot area memory and fall back to SBI PMU calls without snapshot */
+ if (ret.error) {
+ if (ret.error != SBI_ERR_NOT_SUPPORTED)
+ pr_warn("pmu snapshot setup failed with error %ld\n", ret.error);
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
+ cpu_hw_evt->snapshot_set_done = true;
+
+ return 0;
+}
+
static u64 pmu_sbi_ctr_read(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
struct sbiret ret;
- union sbi_pmu_ctr_info info;
u64 val = 0;
+ struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+ union sbi_pmu_ctr_info info = pmu_ctr_list[idx];
+
+ /* Read the value from the shared memory directly only if counter is stopped */
+ if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) {
+ val = sdata->ctr_values[idx];
+ return val;
+ }
if (pmu_sbi_is_fw_event(event)) {
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ,
hwc->idx, 0, 0, 0, 0, 0);
- if (!ret.error)
- val = ret.value;
+ if (ret.error)
+ return 0;
+
+ val = ret.value;
+ if (IS_ENABLED(CONFIG_32BIT) && sbi_v2_available && info.width >= 32) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ_HI,
+ hwc->idx, 0, 0, 0, 0, 0);
+ if (!ret.error)
+ val |= ((u64)ret.value << 32);
+ else
+ WARN_ONCE(1, "Unable to read upper 32 bits of firmware counter error: %ld\n",
+ ret.error);
+ }
} else {
- info = pmu_ctr_list[idx];
val = riscv_pmu_ctr_read_csr(info.csr);
if (IS_ENABLED(CONFIG_32BIT))
- val = ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 31 | val;
+ val |= ((u64)riscv_pmu_ctr_read_csr(info.csr + 0x80)) << 32;
}
return val;
@@ -553,6 +660,7 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
struct hw_perf_event *hwc = &event->hw;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
+ /* There is no benefit setting SNAPSHOT FLAG for a single counter */
#if defined(CONFIG_32BIT)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx,
1, flag, ival, ival >> 32, 0);
@@ -573,16 +681,36 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
{
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
+ struct riscv_pmu *pmu = to_riscv_pmu(event->pmu);
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
if ((hwc->flags & PERF_EVENT_FLAG_USER_ACCESS) &&
(hwc->flags & PERF_EVENT_FLAG_USER_READ_CNT))
pmu_sbi_reset_scounteren((void *)event);
+ if (sbi_pmu_snapshot_available())
+ flag |= SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
+
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
- if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
- flag != SBI_PMU_STOP_FLAG_RESET)
+ if (!ret.error && sbi_pmu_snapshot_available()) {
+ /*
+ * The counter snapshot is based on the index base specified by hwc->idx.
+ * The actual counter value is updated in shared memory at index 0 when counter
+ * mask is 0x01. To ensure accurate counter values, it's necessary to transfer
+ * the counter value to shared memory. However, if hwc->idx is zero, the counter
+ * value is already correctly updated in shared memory, requiring no further
+ * adjustment.
+ */
+ if (hwc->idx > 0) {
+ sdata->ctr_values[hwc->idx] = sdata->ctr_values[0];
+ sdata->ctr_values[0] = 0;
+ }
+ } else if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
+ flag != SBI_PMU_STOP_FLAG_RESET) {
pr_err("Stopping counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
+ }
}
static int pmu_sbi_find_num_ctrs(void)
@@ -640,10 +768,39 @@ static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu)
static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
{
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+ unsigned long flag = 0;
+ int i, idx;
+ struct sbiret ret;
+ u64 temp_ctr_overflow_mask = 0;
+
+ if (sbi_pmu_snapshot_available())
+ flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
+
+ /* Reset the shadow copy to avoid save/restore any value from previous overflow */
+ memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
+
+ for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+ /* No need to check the error here as we can't do anything about the error */
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG,
+ cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
+ if (!ret.error && sbi_pmu_snapshot_available()) {
+ /* Save the counter values to avoid clobbering */
+ for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
+ cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] =
+ sdata->ctr_values[idx];
+ /* Save the overflow mask to avoid clobbering */
+ temp_ctr_overflow_mask |= sdata->ctr_overflow_mask << (i * BITS_PER_LONG);
+ }
+ }
- /* No need to check the error here as we can't do anything about the error */
- sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0,
- cpu_hw_evt->used_hw_ctrs[0], 0, 0, 0, 0);
+ /* Restore the counter values to the shared memory for used hw counters */
+ if (sbi_pmu_snapshot_available()) {
+ for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS)
+ sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx];
+ if (temp_ctr_overflow_mask)
+ sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
+ }
}
/*
@@ -652,11 +809,10 @@ static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
* while the overflowed counters need to be started with updated initialization
* value.
*/
-static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
- unsigned long ctr_ovf_mask)
+static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
+ u64 ctr_ovf_mask)
{
- int idx = 0;
- struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+ int idx = 0, i;
struct perf_event *event;
unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE;
unsigned long ctr_start_mask = 0;
@@ -664,11 +820,12 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
struct hw_perf_event *hwc;
u64 init_val = 0;
- ctr_start_mask = cpu_hw_evt->used_hw_ctrs[0] & ~ctr_ovf_mask;
-
- /* Start all the counters that did not overflow in a single shot */
- sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask,
- 0, 0, 0, 0);
+ for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+ ctr_start_mask = cpu_hw_evt->used_hw_ctrs[i] & ~ctr_ovf_mask;
+ /* Start all the counters that did not overflow in a single shot */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, i * BITS_PER_LONG, ctr_start_mask,
+ 0, 0, 0, 0);
+ }
/* Reinitialize and start all the counter that overflowed */
while (ctr_ovf_mask) {
@@ -691,6 +848,52 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
}
}
+static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt,
+ u64 ctr_ovf_mask)
+{
+ int i, idx = 0;
+ struct perf_event *event;
+ unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT;
+ u64 max_period, init_val = 0;
+ struct hw_perf_event *hwc;
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
+
+ for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS) {
+ if (ctr_ovf_mask & BIT(idx)) {
+ event = cpu_hw_evt->events[idx];
+ hwc = &event->hw;
+ max_period = riscv_pmu_ctr_get_width_mask(event);
+ init_val = local64_read(&hwc->prev_count) & max_period;
+ cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val;
+ }
+ /*
+ * We do not need to update the non-overflow counters the previous
+ * value should have been there already.
+ */
+ }
+
+ for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+ /* Restore the counter values to relative indices for used hw counters */
+ for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
+ sdata->ctr_values[idx] =
+ cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG];
+ /* Start all the counters in a single shot */
+ sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG,
+ cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
+ }
+}
+
+static void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu,
+ u64 ctr_ovf_mask)
+{
+ struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
+
+ if (sbi_pmu_snapshot_available())
+ pmu_sbi_start_ovf_ctrs_snapshot(cpu_hw_evt, ctr_ovf_mask);
+ else
+ pmu_sbi_start_ovf_ctrs_sbi(cpu_hw_evt, ctr_ovf_mask);
+}
+
static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
{
struct perf_sample_data data;
@@ -700,10 +903,11 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
int lidx, hidx, fidx;
struct riscv_pmu *pmu;
struct perf_event *event;
- unsigned long overflow;
- unsigned long overflowed_ctrs = 0;
+ u64 overflow;
+ u64 overflowed_ctrs = 0;
struct cpu_hw_events *cpu_hw_evt = dev;
u64 start_clock = sched_clock();
+ struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
if (WARN_ON_ONCE(!cpu_hw_evt))
return IRQ_NONE;
@@ -725,7 +929,10 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
pmu_sbi_stop_hw_ctrs(pmu);
/* Overflow status register should only be read after counter are stopped */
- ALT_SBI_PMU_OVERFLOW(overflow);
+ if (sbi_pmu_snapshot_available())
+ overflow = sdata->ctr_overflow_mask;
+ else
+ ALT_SBI_PMU_OVERFLOW(overflow);
/*
* Overflow interrupt pending bit should only be cleared after stopping
@@ -751,9 +958,14 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
if (!info || info->type != SBI_PMU_CTR_TYPE_HW)
continue;
- /* compute hardware counter index */
- hidx = info->csr - CSR_CYCLE;
- /* check if the corresponding bit is set in sscountovf */
+ if (sbi_pmu_snapshot_available())
+ /* SBI implementation already updated the logical indicies */
+ hidx = lidx;
+ else
+ /* compute hardware counter index */
+ hidx = info->csr - CSR_CYCLE;
+
+ /* check if the corresponding bit is set in sscountovf or overflow mask in shmem */
if (!(overflow & BIT(hidx)))
continue;
@@ -763,7 +975,10 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
*/
overflowed_ctrs |= BIT(lidx);
hw_evt = &event->hw;
+ /* Update the event states here so that we know the state while reading */
+ hw_evt->state |= PERF_HES_STOPPED;
riscv_pmu_event_update(event);
+ hw_evt->state |= PERF_HES_UPTODATE;
perf_sample_data_init(&data, 0, hw_evt->last_period);
if (riscv_pmu_event_set_period(event)) {
/*
@@ -776,6 +991,8 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
*/
perf_event_overflow(event, &data, regs);
}
+ /* Reset the state as we are going to start the counter after the loop */
+ hw_evt->state = 0;
}
pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs);
@@ -807,6 +1024,9 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
}
+ if (sbi_pmu_snapshot_available())
+ return pmu_sbi_snapshot_setup(pmu, cpu);
+
return 0;
}
@@ -819,6 +1039,9 @@ static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
/* Disable all counters access for user mode now */
csr_write(CSR_SCOUNTEREN, 0x0);
+ if (sbi_pmu_snapshot_available())
+ return pmu_sbi_snapshot_disable();
+
return 0;
}
@@ -927,6 +1150,12 @@ static inline void riscv_pm_pmu_unregister(struct riscv_pmu *pmu) { }
static void riscv_pmu_destroy(struct riscv_pmu *pmu)
{
+ if (sbi_v2_available) {
+ if (sbi_pmu_snapshot_available()) {
+ pmu_sbi_snapshot_disable();
+ pmu_sbi_snapshot_free(pmu);
+ }
+ }
riscv_pm_pmu_unregister(pmu);
cpuhp_state_remove_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
}
@@ -1094,10 +1323,6 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
pmu->event_unmapped = pmu_sbi_event_unmapped;
pmu->csr_index = pmu_sbi_csr_index;
- ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
- if (ret)
- return ret;
-
ret = riscv_pm_pmu_register(pmu);
if (ret)
goto out_unregister;
@@ -1106,8 +1331,34 @@ static int pmu_sbi_device_probe(struct platform_device *pdev)
if (ret)
goto out_unregister;
+ /* SBI PMU Snapsphot is only available in SBI v2.0 */
+ if (sbi_v2_available) {
+ ret = pmu_sbi_snapshot_alloc(pmu);
+ if (ret)
+ goto out_unregister;
+
+ ret = pmu_sbi_snapshot_setup(pmu, smp_processor_id());
+ if (ret) {
+ /* Snapshot is an optional feature. Continue if not available */
+ pmu_sbi_snapshot_free(pmu);
+ } else {
+ pr_info("SBI PMU snapshot detected\n");
+ /*
+ * We enable it once here for the boot cpu. If snapshot shmem setup
+ * fails during cpu hotplug process, it will fail to start the cpu
+ * as we can not handle hetergenous PMUs with different snapshot
+ * capability.
+ */
+ static_branch_enable(&sbi_pmu_snapshot_available);
+ }
+ }
+
register_sysctl("kernel", sbi_pmu_sysctl_table);
+ ret = cpuhp_state_add_instance(CPUHP_AP_PERF_RISCV_STARTING, &pmu->node);
+ if (ret)
+ goto out_unregister;
+
return 0;
out_unregister:
@@ -1135,6 +1386,9 @@ static int __init pmu_sbi_devinit(void)
return 0;
}
+ if (sbi_spec_version >= sbi_mk_version(2, 0))
+ sbi_v2_available = true;
+
ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_RISCV_STARTING,
"perf/riscv/pmu:starting",
pmu_sbi_starting_cpu, pmu_sbi_dying_cpu);
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index 43282e22ebe1df..701974639ff2ca 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -39,6 +39,14 @@ struct cpu_hw_events {
DECLARE_BITMAP(used_hw_ctrs, RISCV_MAX_COUNTERS);
/* currently enabled firmware counters */
DECLARE_BITMAP(used_fw_ctrs, RISCV_MAX_COUNTERS);
+ /* The virtual address of the shared memory where counter snapshot will be taken */
+ void *snapshot_addr;
+ /* The physical address of the shared memory where counter snapshot will be taken */
+ phys_addr_t snapshot_addr_phys;
+ /* Boolean flag to indicate setup is already done */
+ bool snapshot_set_done;
+ /* A shadow copy of the counter values to avoid clobbering during multiple SBI calls */
+ u64 snapshot_cval_shcopy[RISCV_MAX_COUNTERS];
};
struct riscv_pmu {
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index d68d3247390c09..a832e11944bf63 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -193,6 +193,8 @@ TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
+TEST_GEN_PROGS_riscv += riscv/ebreak_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += demand_paging_test
TEST_GEN_PROGS_riscv += dirty_log_test
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index ce473fe251dde4..5f389166338c07 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -50,6 +50,16 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
+static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext)
+{
+ return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext));
+}
+
+static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
+{
+ return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
+}
+
struct ex_regs {
unsigned long ra;
unsigned long sp;
@@ -154,45 +164,6 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle
#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
-/* SBI return error codes */
-#define SBI_SUCCESS 0
-#define SBI_ERR_FAILURE -1
-#define SBI_ERR_NOT_SUPPORTED -2
-#define SBI_ERR_INVALID_PARAM -3
-#define SBI_ERR_DENIED -4
-#define SBI_ERR_INVALID_ADDRESS -5
-#define SBI_ERR_ALREADY_AVAILABLE -6
-#define SBI_ERR_ALREADY_STARTED -7
-#define SBI_ERR_ALREADY_STOPPED -8
-
-#define SBI_EXT_EXPERIMENTAL_START 0x08000000
-#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
-
-#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
-#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
-#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
-
-enum sbi_ext_id {
- SBI_EXT_BASE = 0x10,
- SBI_EXT_STA = 0x535441,
-};
-
-enum sbi_ext_base_fid {
- SBI_EXT_BASE_PROBE_EXT = 3,
-};
-
-struct sbiret {
- long error;
- long value;
-};
-
-struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
- unsigned long arg1, unsigned long arg2,
- unsigned long arg3, unsigned long arg4,
- unsigned long arg5);
-
-bool guest_sbi_probe_extension(int extid, long *out_val);
-
static inline void local_irq_enable(void)
{
csr_set(CSR_SSTATUS, SR_SIE);
diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h
new file mode 100644
index 00000000000000..046b432ae896f4
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/sbi.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V SBI specific definitions
+ *
+ * Copyright (C) 2024 Rivos Inc.
+ */
+
+#ifndef SELFTEST_KVM_SBI_H
+#define SELFTEST_KVM_SBI_H
+
+/* SBI spec version fields */
+#define SBI_SPEC_VERSION_DEFAULT 0x1
+#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
+#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
+#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff
+
+/* SBI return error codes */
+#define SBI_SUCCESS 0
+#define SBI_ERR_FAILURE -1
+#define SBI_ERR_NOT_SUPPORTED -2
+#define SBI_ERR_INVALID_PARAM -3
+#define SBI_ERR_DENIED -4
+#define SBI_ERR_INVALID_ADDRESS -5
+#define SBI_ERR_ALREADY_AVAILABLE -6
+#define SBI_ERR_ALREADY_STARTED -7
+#define SBI_ERR_ALREADY_STOPPED -8
+
+#define SBI_EXT_EXPERIMENTAL_START 0x08000000
+#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
+
+#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
+#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
+#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
+
+enum sbi_ext_id {
+ SBI_EXT_BASE = 0x10,
+ SBI_EXT_STA = 0x535441,
+ SBI_EXT_PMU = 0x504D55,
+};
+
+enum sbi_ext_base_fid {
+ SBI_EXT_BASE_GET_SPEC_VERSION = 0,
+ SBI_EXT_BASE_GET_IMP_ID,
+ SBI_EXT_BASE_GET_IMP_VERSION,
+ SBI_EXT_BASE_PROBE_EXT = 3,
+};
+enum sbi_ext_pmu_fid {
+ SBI_EXT_PMU_NUM_COUNTERS = 0,
+ SBI_EXT_PMU_COUNTER_GET_INFO,
+ SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ SBI_EXT_PMU_COUNTER_START,
+ SBI_EXT_PMU_COUNTER_STOP,
+ SBI_EXT_PMU_COUNTER_FW_READ,
+ SBI_EXT_PMU_COUNTER_FW_READ_HI,
+ SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+};
+
+union sbi_pmu_ctr_info {
+ unsigned long value;
+ struct {
+ unsigned long csr:12;
+ unsigned long width:6;
+#if __riscv_xlen == 32
+ unsigned long reserved:13;
+#else
+ unsigned long reserved:45;
+#endif
+ unsigned long type:1;
+ };
+};
+
+struct riscv_pmu_snapshot_data {
+ u64 ctr_overflow_mask;
+ u64 ctr_values[64];
+ u64 reserved[447];
+};
+
+struct sbiret {
+ long error;
+ long value;
+};
+
+/** General pmu event codes specified in SBI PMU extension */
+enum sbi_pmu_hw_generic_events_t {
+ SBI_PMU_HW_NO_EVENT = 0,
+ SBI_PMU_HW_CPU_CYCLES = 1,
+ SBI_PMU_HW_INSTRUCTIONS = 2,
+ SBI_PMU_HW_CACHE_REFERENCES = 3,
+ SBI_PMU_HW_CACHE_MISSES = 4,
+ SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
+ SBI_PMU_HW_BRANCH_MISSES = 6,
+ SBI_PMU_HW_BUS_CYCLES = 7,
+ SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
+ SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
+ SBI_PMU_HW_REF_CPU_CYCLES = 10,
+
+ SBI_PMU_HW_GENERAL_MAX,
+};
+
+/* SBI PMU counter types */
+enum sbi_pmu_ctr_type {
+ SBI_PMU_CTR_TYPE_HW = 0x0,
+ SBI_PMU_CTR_TYPE_FW,
+};
+
+/* Flags defined for config matching function */
+#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
+#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
+#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
+#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
+#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
+#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
+#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
+#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
+
+/* Flags defined for counter start function */
+#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
+#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
+
+/* Flags defined for counter stop function */
+#define SBI_PMU_STOP_FLAG_RESET BIT(0)
+#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5);
+
+bool guest_sbi_probe_extension(int extid, long *out_val);
+
+/* Make SBI version */
+static inline unsigned long sbi_mk_version(unsigned long major,
+ unsigned long minor)
+{
+ return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
+ | (minor & SBI_SPEC_VERSION_MINOR_MASK);
+}
+
+unsigned long get_host_sbi_spec_version(void);
+
+#endif /* SELFTEST_KVM_SBI_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h
index be46eb32ec2773..a695ae36f3e0de 100644
--- a/tools/testing/selftests/kvm/include/riscv/ucall.h
+++ b/tools/testing/selftests/kvm/include/riscv/ucall.h
@@ -3,6 +3,7 @@
#define SELFTEST_KVM_UCALL_H
#include "processor.h"
+#include "sbi.h"
#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index e8211f5d68637e..ccb35573749c3d 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -502,3 +502,15 @@ bool guest_sbi_probe_extension(int extid, long *out_val)
return true;
}
+
+unsigned long get_host_sbi_spec_version(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0,
+ 0, 0, 0, 0, 0);
+
+ GUEST_ASSERT(!ret.error);
+
+ return ret.value;
+}
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 0f9cabd99fd451..735b7856902187 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -85,7 +85,7 @@ struct kvm_vm *test_vm_create(void)
int nr_vcpus = test_args.nr_vcpus;
vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
- __TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)),
+ __TEST_REQUIRE(__vcpu_has_isa_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC),
"SSTC not available, skipping test\n");
vm_init_vector_tables(vm);
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
new file mode 100644
index 00000000000000..823c132069b46a
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V KVM ebreak test.
+ *
+ * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd.
+ *
+ */
+#include "kvm_util.h"
+
+#define LABEL_ADDRESS(v) ((uint64_t)&(v))
+
+extern unsigned char sw_bp_1, sw_bp_2;
+static uint64_t sw_bp_addr;
+
+static void guest_code(void)
+{
+ asm volatile(
+ ".option push\n"
+ ".option norvc\n"
+ "sw_bp_1: ebreak\n"
+ "sw_bp_2: ebreak\n"
+ ".option pop\n"
+ );
+ GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2));
+
+ GUEST_DONE();
+}
+
+static void guest_breakpoint_handler(struct ex_regs *regs)
+{
+ WRITE_ONCE(sw_bp_addr, regs->epc);
+ regs->epc += 4;
+}
+
+int main(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint64_t pc;
+ struct kvm_guest_debug debug = {
+ .control = KVM_GUESTDBG_ENABLE,
+ };
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_vector_tables(vm);
+ vcpu_init_vector_tables(vcpu);
+ vm_install_exception_handler(vm, EXC_BREAKPOINT,
+ guest_breakpoint_handler);
+
+ /*
+ * Enable the guest debug.
+ * ebreak should exit to the VMM with KVM_EXIT_DEBUG reason.
+ */
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc);
+ TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1));
+
+ /* skip sw_bp_1 */
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4);
+
+ /*
+ * Disable all debug controls.
+ * Guest should handle the ebreak without exiting to the VMM.
+ */
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index b882b7b9b78506..222198dd6d0450 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -43,6 +43,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
@@ -408,6 +409,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(V),
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
@@ -931,6 +933,7 @@ KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
+KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF);
KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC);
KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
@@ -986,6 +989,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_fp_d,
&config_h,
&config_smstateen,
+ &config_sscofpmf,
&config_sstc,
&config_svinval,
&config_svnapot,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
new file mode 100644
index 00000000000000..69bb94e6b22762
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -0,0 +1,681 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * sbi_pmu_test.c - Tests the riscv64 SBI PMU functionality.
+ *
+ * Copyright (c) 2024, Rivos Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+#include "sbi.h"
+#include "arch_timer.h"
+
+/* Maximum counters(firmware + hardware) */
+#define RISCV_MAX_PMU_COUNTERS 64
+union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS];
+
+/* Snapshot shared memory data */
+#define PMU_SNAPSHOT_GPA_BASE BIT(30)
+static void *snapshot_gva;
+static vm_paddr_t snapshot_gpa;
+
+static int vcpu_shared_irq_count;
+static int counter_in_use;
+
+/* Cache the available counters in a bitmask */
+static unsigned long counter_mask_available;
+
+static bool illegal_handler_invoked;
+
+#define SBI_PMU_TEST_BASIC BIT(0)
+#define SBI_PMU_TEST_EVENTS BIT(1)
+#define SBI_PMU_TEST_SNAPSHOT BIT(2)
+#define SBI_PMU_TEST_OVERFLOW BIT(3)
+
+static int disabled_tests;
+
+unsigned long pmu_csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val) {\
+ case __csr_num: \
+ __val = csr_read(__csr_num); \
+ break; }
+#define switchcase_csr_read_2(__csr_num, __val) {\
+ switchcase_csr_read(__csr_num + 0, __val) \
+ switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val) {\
+ switchcase_csr_read_2(__csr_num + 0, __val) \
+ switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val) {\
+ switchcase_csr_read_4(__csr_num + 0, __val) \
+ switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val) {\
+ switchcase_csr_read_8(__csr_num + 0, __val) \
+ switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val) {\
+ switchcase_csr_read_16(__csr_num + 0, __val) \
+ switchcase_csr_read_16(__csr_num + 16, __val)}
+
+ unsigned long ret = 0;
+
+ switch (csr_num) {
+ switchcase_csr_read_32(CSR_CYCLE, ret)
+ switchcase_csr_read_32(CSR_CYCLEH, ret)
+ default :
+ break;
+ }
+
+ return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+static inline void dummy_func_loop(uint64_t iter)
+{
+ int i = 0;
+
+ while (i < iter) {
+ asm volatile("nop");
+ i++;
+ }
+}
+
+static void start_counter(unsigned long counter, unsigned long start_flags,
+ unsigned long ival)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, counter, 1, start_flags,
+ ival, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "Unable to start counter %ld\n", counter);
+}
+
+/* This should be invoked only for reset counter use case */
+static void stop_reset_counter(unsigned long counter, unsigned long stop_flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1,
+ stop_flags | SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
+ __GUEST_ASSERT(ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld\n", counter);
+}
+
+static void stop_counter(unsigned long counter, unsigned long stop_flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
+ 0, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
+ counter, ret.error);
+}
+
+static void guest_illegal_exception_handler(struct ex_regs *regs)
+{
+ __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
+ "Unexpected exception handler %lx\n", regs->cause);
+
+ illegal_handler_invoked = true;
+ /* skip the trapping instruction */
+ regs->epc += 4;
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+ unsigned long overflown_mask;
+ unsigned long counter_val = 0;
+
+ /* Validate that we are in the correct irq handler */
+ GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
+
+ /* Stop all counters first to avoid further interrupts */
+ stop_counter(counter_in_use, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ csr_clear(CSR_SIP, BIT(IRQ_PMU_OVF));
+
+ overflown_mask = READ_ONCE(snapshot_data->ctr_overflow_mask);
+ GUEST_ASSERT(overflown_mask & 0x01);
+
+ WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
+
+ counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
+ /* Now start the counter to mimick the real driver behavior */
+ start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
+}
+
+static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
+ unsigned long cflags,
+ unsigned long event)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask,
+ cflags, event, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "config matching failed %ld\n", ret.error);
+ GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS);
+ GUEST_ASSERT(BIT(ret.value) & counter_mask_available);
+
+ return ret.value;
+}
+
+static unsigned long get_num_counters(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
+
+ __GUEST_ASSERT(ret.error == 0, "Unable to retrieve number of counters from SBI PMU");
+ __GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS,
+ "Invalid number of counters %ld\n", ret.value);
+
+ return ret.value;
+}
+
+static void update_counter_info(int num_counters)
+{
+ int i = 0;
+ struct sbiret ret;
+
+ for (i = 0; i < num_counters; i++) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
+
+ /* There can be gaps in logical counter indicies*/
+ if (ret.error)
+ continue;
+ GUEST_ASSERT_NE(ret.value, 0);
+
+ ctrinfo_arr[i].value = ret.value;
+ counter_mask_available |= BIT(i);
+ }
+
+ GUEST_ASSERT(counter_mask_available > 0);
+}
+
+static unsigned long read_fw_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, idx, 0, 0, 0, 0, 0);
+ GUEST_ASSERT(ret.error == 0);
+ return ret.value;
+}
+
+static unsigned long read_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
+{
+ unsigned long counter_val = 0;
+
+ __GUEST_ASSERT(ctrinfo.type < 2, "Invalid counter type %d", ctrinfo.type);
+
+ if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW)
+ counter_val = pmu_csr_read_num(ctrinfo.csr);
+ else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW)
+ counter_val = read_fw_counter(idx, ctrinfo);
+
+ return counter_val;
+}
+
+static inline void verify_sbi_requirement_assert(void)
+{
+ long out_val = 0;
+ bool probe;
+
+ probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
+
+ if (get_host_sbi_spec_version() < sbi_mk_version(2, 0))
+ __GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot");
+}
+
+static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags)
+{
+ unsigned long lo = (unsigned long)gpa;
+#if __riscv_xlen == 32
+ unsigned long hi = (unsigned long)(gpa >> 32);
+#else
+ unsigned long hi = gpa == -1 ? -1 : 0;
+#endif
+ struct sbiret ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ lo, hi, flags, 0, 0, 0);
+
+ GUEST_ASSERT(ret.value == 0 && ret.error == 0);
+}
+
+static void test_pmu_event(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_pre, counter_value_post;
+ unsigned long counter_init_value = 100;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ /* Do not set the initial value */
+ start_counter(counter, 0, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, 0);
+
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_post > counter_value_pre,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /*
+ * We can't just update the counter without starting it.
+ * Do start/stop twice to simulate that by first initializing to a very
+ * high value and a low value after that.
+ */
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, ULONG_MAX/2);
+ stop_counter(counter, 0);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
+ stop_counter(counter, 0);
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_pre > counter_value_post,
+ "Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /* Now set the initial value and compare */
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
+ dummy_func_loop(10000);
+ stop_counter(counter, 0);
+
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_post > counter_init_value,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_pmu_event_snapshot(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_pre, counter_value_post;
+ unsigned long counter_init_value = 100;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ /* Do not set the initial value */
+ start_counter(counter, 0, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ /* The counter value is updated w.r.t relative index of cbase */
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_post > counter_value_pre,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /*
+ * We can't just update the counter without starting it.
+ * Do start/stop twice to simulate that by first initializing to a very
+ * high value and a low value after that.
+ */
+ WRITE_ONCE(snapshot_data->ctr_values[0], ULONG_MAX/2);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+ counter_value_pre = READ_ONCE(snapshot_data->ctr_values[0]);
+
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_pre > counter_value_post,
+ "Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /* Now set the initial value and compare */
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_post > counter_init_value,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_pmu_event_overflow(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_post;
+ unsigned long counter_init_value = ULONG_MAX - 10000;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_in_use = counter;
+
+ /* The counter value is updated w.r.t relative index of cbase passed to start/stop */
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ dummy_func_loop(10000);
+ udelay(msecs_to_usecs(2000));
+ /* irq handler should have stopped the counter */
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ /* The counter value after stopping should be less the init value due to overflow */
+ __GUEST_ASSERT(counter_value_post < counter_init_value,
+ "counter_value_post %lx counter_init_value %lx for counter\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_invalid_event(void)
+{
+ struct sbiret ret;
+ unsigned long event = 0x1234; /* A random event */
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 0,
+ counter_mask_available, 0, event, 0, 0);
+ GUEST_ASSERT_EQ(ret.error, SBI_ERR_NOT_SUPPORTED);
+}
+
+static void test_pmu_events(void)
+{
+ int num_counters = 0;
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /* Sanity testing for any random invalid event */
+ test_invalid_event();
+
+ /* Only these two events are guaranteed to be present */
+ test_pmu_event(SBI_PMU_HW_CPU_CYCLES);
+ test_pmu_event(SBI_PMU_HW_INSTRUCTIONS);
+
+ GUEST_DONE();
+}
+
+static void test_pmu_basic_sanity(void)
+{
+ long out_val = 0;
+ bool probe;
+ struct sbiret ret;
+ int num_counters = 0, i;
+ union sbi_pmu_ctr_info ctrinfo;
+
+ probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
+
+ num_counters = get_num_counters();
+
+ for (i = 0; i < num_counters; i++) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i,
+ 0, 0, 0, 0, 0);
+
+ /* There can be gaps in logical counter indicies*/
+ if (ret.error)
+ continue;
+ GUEST_ASSERT_NE(ret.value, 0);
+
+ ctrinfo.value = ret.value;
+
+ /**
+ * Accessibility check of hardware and read capability of firmware counters.
+ * The spec doesn't mandate any initial value. No need to check any value.
+ */
+ if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) {
+ pmu_csr_read_num(ctrinfo.csr);
+ GUEST_ASSERT(illegal_handler_invoked);
+ } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
+ read_fw_counter(i, ctrinfo);
+ }
+ }
+
+ GUEST_DONE();
+}
+
+static void test_pmu_events_snaphost(void)
+{
+ int num_counters = 0;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+ int i;
+
+ /* Verify presence of SBI PMU and minimum requrired SBI version */
+ verify_sbi_requirement_assert();
+
+ snapshot_set_shmem(snapshot_gpa, 0);
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /* Validate shared memory access */
+ GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_overflow_mask), 0);
+ for (i = 0; i < num_counters; i++) {
+ if (counter_mask_available & (BIT(i)))
+ GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_values[i]), 0);
+ }
+ /* Only these two events are guranteed to be present */
+ test_pmu_event_snapshot(SBI_PMU_HW_CPU_CYCLES);
+ test_pmu_event_snapshot(SBI_PMU_HW_INSTRUCTIONS);
+
+ GUEST_DONE();
+}
+
+static void test_pmu_events_overflow(void)
+{
+ int num_counters = 0;
+
+ /* Verify presence of SBI PMU and minimum requrired SBI version */
+ verify_sbi_requirement_assert();
+
+ snapshot_set_shmem(snapshot_gpa, 0);
+ csr_set(CSR_IE, BIT(IRQ_PMU_OVF));
+ local_irq_enable();
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /*
+ * Qemu supports overflow for cycle/instruction.
+ * This test may fail on any platform that do not support overflow for these two events.
+ */
+ test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
+
+ test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
+
+ GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ case UCALL_SYNC:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+}
+
+void test_vm_destroy(struct kvm_vm *vm)
+{
+ memset(ctrinfo_arr, 0, sizeof(union sbi_pmu_ctr_info) * RISCV_MAX_PMU_COUNTERS);
+ counter_mask_available = 0;
+ kvm_vm_free(vm);
+}
+
+static void test_vm_basic_test(void *guest_code)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+ vm_init_vector_tables(vm);
+ /* Illegal instruction handler is required to verify read access without configuration */
+ vm_install_exception_handler(vm, EXC_INST_ILLEGAL, guest_illegal_exception_handler);
+
+ vcpu_init_vector_tables(vcpu);
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_events_test(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu = NULL;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ /* PMU Snapshot requires single page only */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, PMU_SNAPSHOT_GPA_BASE, 1, 1, 0);
+ /* PMU_SNAPSHOT_GPA_BASE is identity mapped */
+ virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1);
+
+ snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE);
+ snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva);
+ sync_global_to_guest(vcpu->vm, snapshot_gva);
+ sync_global_to_guest(vcpu->vm, snapshot_gpa);
+}
+
+static void test_vm_events_snapshot_test(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+
+ test_vm_setup_snapshot_mem(vm, vcpu);
+
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_events_overflow(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+
+ __TEST_REQUIRE(__vcpu_has_isa_ext(vcpu, KVM_RISCV_ISA_EXT_SSCOFPMF),
+ "Sscofpmf is not available, skipping overflow test");
+
+ test_vm_setup_snapshot_mem(vm, vcpu);
+ vm_init_vector_tables(vm);
+ vm_install_interrupt_handler(vm, guest_irq_handler);
+
+ vcpu_init_vector_tables(vcpu);
+ /* Initialize guest timer frequency. */
+ vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq);
+ sync_global_to_guest(vm, timer_freq);
+
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-d <test name>]\n", name);
+ pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hd:")) != -1) {
+ switch (opt) {
+ case 'd':
+ if (!strncmp("basic", optarg, 5))
+ disabled_tests |= SBI_PMU_TEST_BASIC;
+ else if (!strncmp("events", optarg, 6))
+ disabled_tests |= SBI_PMU_TEST_EVENTS;
+ else if (!strncmp("snapshot", optarg, 8))
+ disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
+ else if (!strncmp("overflow", optarg, 8))
+ disabled_tests |= SBI_PMU_TEST_OVERFLOW;
+ else
+ goto done;
+ break;
+ case 'h':
+ default:
+ goto done;
+ }
+ }
+
+ return true;
+done:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
+ test_vm_basic_test(test_pmu_basic_sanity);
+ pr_info("SBI PMU basic test : PASS\n");
+ }
+
+ if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
+ test_vm_events_test(test_pmu_events);
+ pr_info("SBI PMU event verification test : PASS\n");
+ }
+
+ if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+ test_vm_events_snapshot_test(test_pmu_events_snaphost);
+ pr_info("SBI PMU event verification with snapshot test : PASS\n");
+ }
+
+ if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+ test_vm_events_overflow(test_pmu_events_overflow);
+ pr_info("SBI PMU event verification with overflow test : PASS\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index bae0c5026f82f6..2ff82c7fd92659 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -11,7 +11,9 @@
#include <pthread.h>
#include <linux/kernel.h>
#include <asm/kvm.h>
-#ifndef __riscv
+#ifdef __riscv
+#include "sbi.h"
+#else
#include <asm/kvm_para.h>
#endif