aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-09-30 16:03:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-09-30 16:03:19 -0700
commitc816f2e9813d218b36343c67b443c77c539ea294 (patch)
tree68d6b4513d5d76a4e87137976e6cb5347616accf
parent920541bb0b9bf08d1455890694fbb2bbffb7a12b (diff)
parent8e8bf60a675473a034ecfcb7a842b98868ed74a3 (diff)
downloadlinux-sgx-c816f2e9813d218b36343c67b443c77c539ea294.tar.gz
Merge tag 'perf-tools-fixes-for-v6.0-2022-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools fixes from Arnaldo Carvalho de Melo: - Fail the 'perf test record' entry on error, fixing a regression where just setup stuff like allocating memory and not the actual things being tested failed. - Fixup disabling of -Wdeprecated-declarations for the python scripting engine, the previous attempt had a brown paper bag thinko. - Fix branch stack sampling test to include sanity check for branch filter on PowerPC. - Update is_ignored_symbol function to match the kernel ignored list, fixing running the 'perf test' entry that compares resolving symbols from kallsyms to resolving from vmlinux. - Augment the data source type with ARM's neoverse_spe list, the previous code was limited in its search resolving the data source. - Fix some clang 5 variable set but unused cases. - Get a perf cgroup more portably in BPF as the __builtin_preserve_enum_value builtin is not available in older versions of clang. In those cases we can forgo BPF's CO-RE (Compile Once, Run Everywhere). - More Fixes for Intel's hybrid CPU model. * tag 'perf-tools-fixes-for-v6.0-2022-09-29' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: perf build: Fixup disabling of -Wdeprecated-declarations for the python scripting engine perf tests mmap-basic: Remove unused variable to address clang 15 warning perf parse-events: Ignore clang 15 warning about variable set but unused in bison produced code perf tests record: Fail the test if the 'errs' counter is not zero perf test: Fix test case 87 ("perf record tests") for hybrid systems perf arm-spe: augment the data source type with neoverse_spe list perf tests vmlinux-kallsyms: Update is_ignored_symbol function to match the kernel ignored list perf tests powerpc: Fix branch stack sampling test to include sanity check for branch filter perf parse-events: Remove "not supported" hybrid cache events perf print-events: Fix "perf list" can not display the PMU prefix for some hybrid cache events perf tools: Get a perf cgroup more portably in BPF
-rw-r--r--tools/perf/tests/mmap-basic.c3
-rw-r--r--tools/perf/tests/perf-record.c2
-rwxr-xr-xtools/perf/tests/shell/record.sh2
-rwxr-xr-xtools/perf/tests/shell/test_brstack.sh3
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c5
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/arm-spe.c2
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.bpf.c11
-rw-r--r--tools/perf/util/bpf_skel/off_cpu.bpf.c18
-rw-r--r--tools/perf/util/parse-events-hybrid.c21
-rw-r--r--tools/perf/util/parse-events.c39
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/print-events.c39
-rw-r--r--tools/perf/util/scripting-engines/Build2
14 files changed, 92 insertions, 58 deletions
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index dfb6173b2a827f..9e9a2b67de1998 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -114,8 +114,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
for (i = 0; i < nsyscalls; ++i)
for (j = 0; j < expected_nr_events[i]; ++j) {
- int foo = syscalls[i]();
- ++foo;
+ syscalls[i]();
}
md = &evlist->mmap[0];
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 6a001fcfed68e5..4952abe716f318 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -332,7 +332,7 @@ out_delete_evlist:
out:
if (err == -EACCES)
return TEST_SKIP;
- if (err < 0)
+ if (err < 0 || errs != 0)
return TEST_FAIL;
return TEST_OK;
}
diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 00c7285ce1ac64..301f95427159dd 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -61,7 +61,7 @@ test_register_capture() {
echo "Register capture test [Skipped missing registers]"
return
fi
- if ! perf record -o - --intr-regs=di,r8,dx,cx -e cpu/br_inst_retired.near_call/p \
+ if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \
-c 1000 --per-thread true 2> /dev/null \
| perf script -F ip,sym,iregs -i - 2> /dev/null \
| egrep -q "DI:"
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index c644f94a650021..ec801cffae6bcd 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -12,7 +12,8 @@ if ! [ -x "$(command -v cc)" ]; then
fi
# skip the test if the hardware doesn't support branch stack sampling
-perf record -b -o- -B true > /dev/null 2>&1 || exit 2
+# and if the architecture doesn't support filter types: any,save_type,u
+perf record -b -o- -B --branch-filter any,save_type,u true > /dev/null 2>&1 || exit 2
TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX)
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 4fd8d703ff19ee..8ab035b55875c7 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -43,10 +43,11 @@ static bool is_ignored_symbol(const char *name, char type)
/* Symbol names that begin with the following are ignored.*/
static const char * const ignored_prefixes[] = {
"$", /* local symbols for ARM, MIPS, etc. */
- ".LASANPC", /* s390 kasan local symbols */
+ ".L", /* local labels, .LBB,.Ltmpxxx,.L__unnamed_xx,.LASANPC, etc. */
"__crc_", /* modversions */
"__efistub_", /* arm64 EFI stub namespace */
- "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */
+ "__kvm_nvhe_$", /* arm64 local symbols in non-VHE KVM namespace */
+ "__kvm_nvhe_.L", /* arm64 local symbols in non-VHE KVM namespace */
"__AArch64ADRPThunk_", /* arm64 lld */
"__ARMV5PILongThunk_", /* arm lld */
"__ARMV7PILongThunk_",
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 9dfae1bda9cc3e..485e1a3431652a 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -269,7 +269,7 @@ CFLAGS_expr-flex.o += $(flex_flags)
bison_flags := -DYYENABLE_NLS=0
BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
ifeq ($(BISON_GE_35),1)
- bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum
+ bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option
else
bison_flags += -w
endif
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 22dcfe07e886f9..906476a839e1f5 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -498,7 +498,7 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
{
union perf_mem_data_src data_src = { 0 };
- bool is_neoverse = is_midr_in_range(midr, neoverse_spe);
+ bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
if (record->op == ARM_SPE_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
index c72f8ad96f7519..9aa8cdd93de4e3 100644
--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
@@ -48,6 +48,7 @@ const volatile __u32 num_cpus = 1;
int enabled = 0;
int use_cgroup_v2 = 0;
+int perf_subsys_id = -1;
static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
{
@@ -58,7 +59,15 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size)
int level;
int cnt;
- cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_event_cgrp_id], cgroup);
+ if (perf_subsys_id == -1) {
+#if __has_builtin(__builtin_preserve_enum_value)
+ perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
+ perf_event_cgrp_id);
+#else
+ perf_subsys_id = perf_event_cgrp_id;
+#endif
+ }
+ cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup);
level = BPF_CORE_READ(cgrp, level);
for (cnt = 0; i < MAX_LEVELS; i++) {
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
index c4ba2bcf179f44..38e3b287dbb225 100644
--- a/tools/perf/util/bpf_skel/off_cpu.bpf.c
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -94,6 +94,8 @@ const volatile bool has_prev_state = false;
const volatile bool needs_cgroup = false;
const volatile bool uses_cgroup_v1 = false;
+int perf_subsys_id = -1;
+
/*
* Old kernel used to call it task_struct->state and now it's '__state'.
* Use BPF CO-RE "ignored suffix rule" to deal with it like below:
@@ -119,11 +121,19 @@ static inline __u64 get_cgroup_id(struct task_struct *t)
{
struct cgroup *cgrp;
- if (uses_cgroup_v1)
- cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_event_cgrp_id], cgroup);
- else
- cgrp = BPF_CORE_READ(t, cgroups, dfl_cgrp);
+ if (!uses_cgroup_v1)
+ return BPF_CORE_READ(t, cgroups, dfl_cgrp, kn, id);
+
+ if (perf_subsys_id == -1) {
+#if __has_builtin(__builtin_preserve_enum_value)
+ perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id,
+ perf_event_cgrp_id);
+#else
+ perf_subsys_id = perf_event_cgrp_id;
+#endif
+ }
+ cgrp = BPF_CORE_READ(t, cgroups, subsys[perf_subsys_id], cgroup);
return BPF_CORE_READ(cgrp, kn, id);
}
diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c
index 284f8eabd3b9ab..7c9f9150bad502 100644
--- a/tools/perf/util/parse-events-hybrid.c
+++ b/tools/perf/util/parse-events-hybrid.c
@@ -33,7 +33,8 @@ static void config_hybrid_attr(struct perf_event_attr *attr,
* If the PMU type ID is 0, the PERF_TYPE_RAW will be applied.
*/
attr->type = type;
- attr->config = attr->config | ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT);
+ attr->config = (attr->config & PERF_HW_EVENT_MASK) |
+ ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT);
}
static int create_event_hybrid(__u32 config_type, int *idx,
@@ -48,13 +49,25 @@ static int create_event_hybrid(__u32 config_type, int *idx,
__u64 config = attr->config;
config_hybrid_attr(attr, config_type, pmu->type);
+
+ /*
+ * Some hybrid hardware cache events are only available on one CPU
+ * PMU. For example, the 'L1-dcache-load-misses' is only available
+ * on cpu_core, while the 'L1-icache-loads' is only available on
+ * cpu_atom. We need to remove "not supported" hybrid cache events.
+ */
+ if (attr->type == PERF_TYPE_HW_CACHE
+ && !is_event_supported(attr->type, attr->config))
+ return 0;
+
evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id,
pmu, config_terms);
- if (evsel)
+ if (evsel) {
evsel->pmu_name = strdup(pmu->name);
- else
+ if (!evsel->pmu_name)
+ return -ENOMEM;
+ } else
return -ENOMEM;
-
attr->type = type;
attr->config = config;
return 0;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f05e15acd33fe8..f3b2c2a87456bd 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -28,6 +28,7 @@
#include "util/parse-events-hybrid.h"
#include "util/pmu-hybrid.h"
#include "tracepoint.h"
+#include "thread_map.h"
#define MAX_NAME_LEN 100
@@ -157,6 +158,44 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
+bool is_event_supported(u8 type, u64 config)
+{
+ bool ret = true;
+ int open_return;
+ struct evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .disabled = 1,
+ };
+ struct perf_thread_map *tmap = thread_map__new_by_tid(0);
+
+ if (tmap == NULL)
+ return false;
+
+ evsel = evsel__new(&attr);
+ if (evsel) {
+ open_return = evsel__open(evsel, NULL, tmap);
+ ret = open_return >= 0;
+
+ if (open_return == -EACCES) {
+ /*
+ * This happens if the paranoid value
+ * /proc/sys/kernel/perf_event_paranoid is set to 2
+ * Re-run with exclude_kernel set; we don't do that
+ * by default as some ARM machines do not support it.
+ *
+ */
+ evsel->core.attr.exclude_kernel = 1;
+ ret = evsel__open(evsel, NULL, tmap) >= 0;
+ }
+ evsel__delete(evsel);
+ }
+
+ perf_thread_map__put(tmap);
+ return ret;
+}
+
const char *event_type(int type)
{
switch (type) {
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 7e6a601d9cd01b..07df7bb7b04200 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -19,6 +19,7 @@ struct option;
struct perf_pmu;
bool have_tracepoints(struct list_head *evlist);
+bool is_event_supported(u8 type, u64 config);
const char *event_type(int type);
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index ba1ab51346854d..c4d5d87fae2f6b 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -22,7 +22,6 @@
#include "probe-file.h"
#include "string2.h"
#include "strlist.h"
-#include "thread_map.h"
#include "tracepoint.h"
#include "pfm.h"
#include "pmu-hybrid.h"
@@ -239,44 +238,6 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob,
strlist__delete(sdtlist);
}
-static bool is_event_supported(u8 type, unsigned int config)
-{
- bool ret = true;
- int open_return;
- struct evsel *evsel;
- struct perf_event_attr attr = {
- .type = type,
- .config = config,
- .disabled = 1,
- };
- struct perf_thread_map *tmap = thread_map__new_by_tid(0);
-
- if (tmap == NULL)
- return false;
-
- evsel = evsel__new(&attr);
- if (evsel) {
- open_return = evsel__open(evsel, NULL, tmap);
- ret = open_return >= 0;
-
- if (open_return == -EACCES) {
- /*
- * This happens if the paranoid value
- * /proc/sys/kernel/perf_event_paranoid is set to 2
- * Re-run with exclude_kernel set; we don't do that
- * by default as some ARM machines do not support it.
- *
- */
- evsel->core.attr.exclude_kernel = 1;
- ret = evsel__open(evsel, NULL, tmap) >= 0;
- }
- evsel__delete(evsel);
- }
-
- perf_thread_map__put(tmap);
- return ret;
-}
-
int print_hwcache_events(const char *event_glob, bool name_only)
{
unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0;
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build
index c92326c2233a45..0f5ba28339cf9a 100644
--- a/tools/perf/util/scripting-engines/Build
+++ b/tools/perf/util/scripting-engines/Build
@@ -3,4 +3,4 @@ perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
-CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-error=deprecated-declarations
+CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations