summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@kernel.org>2015-04-19 15:05:10 -0700
committerAndy Lutomirski <luto@kernel.org>2015-04-19 15:53:01 -0700
commit21ebbf09a504143cc9fe91a466311488e90ff311 (patch)
tree0e0361ae53fe08d42a00eafe993c574789f0e9b2
parentb4c19321394d0de16282ea0a02f53af0643ee25c (diff)
downloadmisc-tests-21ebbf09a504143cc9fe91a466311488e90ff311.tar.gz
perf_self_monitor: Add work-in-progress cycle counter
Signed-off-by: Andy Lutomirski <luto@kernel.org>
-rw-r--r--tight_loop/perf_self_monitor.c163
1 files changed, 163 insertions, 0 deletions
diff --git a/tight_loop/perf_self_monitor.c b/tight_loop/perf_self_monitor.c
new file mode 100644
index 0000000..52d338b
--- /dev/null
+++ b/tight_loop/perf_self_monitor.c
@@ -0,0 +1,163 @@
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <err.h>
+
+struct psm_counter {
+ int fd;
+ struct perf_event_mmap_page *metadata;
+};
+
+struct psm_counter *psm_counter_create(void)
+{
+ struct psm_counter *counter = malloc(sizeof(struct psm_counter));;
+
+ struct perf_event_attr attr;
+ memset(&attr, 0, sizeof(attr));
+
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.size = sizeof(struct perf_event_attr);
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+
+ counter->fd = syscall(
+ SYS_perf_event_open,
+ &attr, /* attributes */
+ 0, /* monitor me */
+ -1, /* all CPUs */
+ -1, /* group leader */
+ PERF_FLAG_FD_CLOEXEC /* flags */
+ );
+
+ if (counter->fd == -1)
+ err(1, "perf_event_open");
+
+ counter->metadata = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
+ counter->fd, 0);
+ if (counter->metadata == MAP_FAILED) {
+ err(1, "mmap");
+ }
+
+ if (!counter->metadata->cap_user_rdpmc)
+ errx(1, "RDPMC not supported (cap_user_rdpmc == 0)\n");
+
+ if (!counter->metadata->index)
+ errx(1, "RDPMC not supported (no index assigned)\n");
+
+ return counter;
+}
+
+void psm_counter_destroy(struct psm_counter *counter)
+{
+ munmap(counter->metadata, PAGE_SIZE);
+ counter->metadata = MAP_FAILED;
+ close(counter->fd);
+ counter->fd = -1;
+}
+
+static inline void psm_barrier(void)
+{
+ asm volatile ("" : : : "memory");
+}
+
+static inline void psm_serialize(void)
+{
+ unsigned int eax = 0, ecx = 0;
+ asm volatile ("cpuid"
+ : "+a" (eax), "+c" (ecx) : : "ebx", "edx", "memory");
+}
+
+static inline uint64_t psm_rdpmc(unsigned int ecx)
+{
+ unsigned int eax, edx;
+ asm volatile ("rdpmc" : "=a" (eax), "=d" (edx) : "c" (ecx));
+ return (((uint64_t)edx << 32) | eax);
+}
+
+/*
+ * psm_atomic is a mechanism for very precise counter measurement of
+ * very short operations. A psm_atomic interval will fail if the
+ * process is preempted during the measurement interval.
+ */
+
+struct psm_atomic {
+ /* Copied from metadata in psm_atomic_start. */
+ uint32_t perf_lock; /* If odd, this sample is bad. */
+ uint64_t perf_time_offset;
+
+ uint64_t initial_raw_count;
+
+ /*
+ * This is here to improve code generation. struct psm_atomic is
+ * unlikely to exist as escaped and hence be affected by a "memory"
+ * clobber.
+ */
+ unsigned int rdpmc_ecx;
+};
+
+/*
+ * Starts measuring an uninterrupted duration.
+ */
+static inline struct psm_atomic psm_atomic_start(const struct psm_counter *ctr)
+{
+ struct psm_atomic state;
+ state.perf_lock = ctr->metadata->lock;
+ psm_barrier();
+ state.perf_time_offset = ctr->metadata->time_offset;
+ state.rdpmc_ecx = ctr->metadata->index - 1;
+ psm_barrier(); /* Do rdpmc last to reduce noise */
+ state.initial_raw_count = psm_rdpmc(state.rdpmc_ecx);
+ return state;
+}
+
+static inline bool psm_atomic_elapsed(uint64_t *elapsed_count,
+ const struct psm_atomic *state,
+ const struct psm_counter *ctr)
+{
+ /* Do the RDPMC first to reduce noise. */
+ uint64_t count_now = psm_rdpmc(state->rdpmc_ecx);
+ psm_barrier(); /* No, really, do it first. */
+ unsigned int shift = 64 - ctr->metadata->pmc_width;
+ count_now <<= shift;
+ uint64_t initial_count = state->initial_raw_count;
+ initial_count <<= shift;
+ *elapsed_count = (count_now - initial_count) >> shift;
+ psm_barrier();
+
+ if (ctr->metadata->time_offset != state->perf_time_offset)
+ return false; /* We were interrupted. */
+
+ /* Now check the lock. */
+ psm_barrier();
+ if (ctr->metadata->lock != state->perf_lock || (state->perf_lock & 1))
+ return false;
+
+ return true;
+}
+
+int main()
+{
+ struct psm_counter *ctr = psm_counter_create();
+
+ for (int i = 0; i < 20; i++) {
+ struct psm_atomic duration = psm_atomic_start(ctr);
+ unsigned long rax;
+ rax = 0xbfffffff;
+ asm volatile ("syscall" : "+a" (rax) : : "rcx", "r11");
+ rax = 0xbfffffff;
+ asm volatile ("syscall" : "+a" (rax) : : "rcx", "r11");
+ uint64_t cycles;
+ bool ok = psm_atomic_elapsed(&cycles, &duration, ctr);
+ if (ok)
+ printf("%d: %lld\n", i, (unsigned long long)cycles);
+ else
+ printf("sadness\n");
+ }
+
+ psm_counter_destroy(ctr);
+}