diff options
author | Andy Lutomirski <luto@kernel.org> | 2015-04-19 15:05:10 -0700 |
---|---|---|
committer | Andy Lutomirski <luto@kernel.org> | 2015-04-19 15:53:01 -0700 |
commit | 21ebbf09a504143cc9fe91a466311488e90ff311 (patch) | |
tree | 0e0361ae53fe08d42a00eafe993c574789f0e9b2 | |
parent | b4c19321394d0de16282ea0a02f53af0643ee25c (diff) | |
download | misc-tests-21ebbf09a504143cc9fe91a466311488e90ff311.tar.gz |
perf_self_monitor: Add work-in-progress cycle counter
Signed-off-by: Andy Lutomirski <luto@kernel.org>
-rw-r--r-- | tight_loop/perf_self_monitor.c | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/tight_loop/perf_self_monitor.c b/tight_loop/perf_self_monitor.c new file mode 100644 index 0000000..52d338b --- /dev/null +++ b/tight_loop/perf_self_monitor.c @@ -0,0 +1,163 @@ +#include <linux/perf_event.h> +#include <sys/syscall.h> +#include <sys/mman.h> +#include <sys/user.h> +#include <unistd.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <err.h> + +struct psm_counter { + int fd; + struct perf_event_mmap_page *metadata; +}; + +struct psm_counter *psm_counter_create(void) +{ + struct psm_counter *counter = malloc(sizeof(struct psm_counter));; + + struct perf_event_attr attr; + memset(&attr, 0, sizeof(attr)); + + attr.type = PERF_TYPE_HARDWARE; + attr.size = sizeof(struct perf_event_attr); + attr.config = PERF_COUNT_HW_CPU_CYCLES; + + counter->fd = syscall( + SYS_perf_event_open, + &attr, /* attributes */ + 0, /* monitor me */ + -1, /* all CPUs */ + -1, /* group leader */ + PERF_FLAG_FD_CLOEXEC /* flags */ + ); + + if (counter->fd == -1) + err(1, "perf_event_open"); + + counter->metadata = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, + counter->fd, 0); + if (counter->metadata == MAP_FAILED) { + err(1, "mmap"); + } + + if (!counter->metadata->cap_user_rdpmc) + errx(1, "RDPMC not supported (cap_user_rdpmc == 0)\n"); + + if (!counter->metadata->index) + errx(1, "RDPMC not supported (no index assigned)\n"); + + return counter; +} + +void psm_counter_destroy(struct psm_counter *counter) +{ + munmap(counter->metadata, PAGE_SIZE); + counter->metadata = MAP_FAILED; + close(counter->fd); + counter->fd = -1; +} + +static inline void psm_barrier(void) +{ + asm volatile ("" : : : "memory"); +} + +static inline void psm_serialize(void) +{ + unsigned int eax = 0, ecx = 0; + asm volatile ("cpuid" + : "+a" (eax), "+c" (ecx) : : "ebx", "edx", "memory"); +} + +static inline uint64_t psm_rdpmc(unsigned int ecx) +{ + unsigned int eax, edx; + asm volatile ("rdpmc" : "=a" (eax), "=d" (edx) : "c" (ecx)); + return (((uint64_t)edx << 32) | eax); +} + +/* + * psm_atomic is a mechanism for very precise counter measurement of + * very short operations. A psm_atomic interval will fail if the + * process is preempted during the measurement interval. + */ + +struct psm_atomic { + /* Copied from metadata in psm_atomic_start. */ + uint32_t perf_lock; /* If odd, this sample is bad. */ + uint64_t perf_time_offset; + + uint64_t initial_raw_count; + + /* + * This is here to improve code generation. struct psm_atomic is + * unlikely to exist as escaped and hence be affected by a "memory" + * clobber. + */ + unsigned int rdpmc_ecx; +}; + +/* + * Starts measuring an uninterrupted duration. + */ +static inline struct psm_atomic psm_atomic_start(const struct psm_counter *ctr) +{ + struct psm_atomic state; + state.perf_lock = ctr->metadata->lock; + psm_barrier(); + state.perf_time_offset = ctr->metadata->time_offset; + state.rdpmc_ecx = ctr->metadata->index - 1; + psm_barrier(); /* Do rdpmc last to reduce noise */ + state.initial_raw_count = psm_rdpmc(state.rdpmc_ecx); + return state; +} + +static inline bool psm_atomic_elapsed(uint64_t *elapsed_count, + const struct psm_atomic *state, + const struct psm_counter *ctr) +{ + /* Do the RDPMC first to reduce noise. */ + uint64_t count_now = psm_rdpmc(state->rdpmc_ecx); + psm_barrier(); /* No, really, do it first. */ + unsigned int shift = 64 - ctr->metadata->pmc_width; + count_now <<= shift; + uint64_t initial_count = state->initial_raw_count; + initial_count <<= shift; + *elapsed_count = (count_now - initial_count) >> shift; + psm_barrier(); + + if (ctr->metadata->time_offset != state->perf_time_offset) + return false; /* We were interrupted. */ + + /* Now check the lock. */ + psm_barrier(); + if (ctr->metadata->lock != state->perf_lock || (state->perf_lock & 1)) + return false; + + return true; +} + +int main() +{ + struct psm_counter *ctr = psm_counter_create(); + + for (int i = 0; i < 20; i++) { + struct psm_atomic duration = psm_atomic_start(ctr); + unsigned long rax; + rax = 0xbfffffff; + asm volatile ("syscall" : "+a" (rax) : : "rcx", "r11"); + rax = 0xbfffffff; + asm volatile ("syscall" : "+a" (rax) : : "rcx", "r11"); + uint64_t cycles; + bool ok = psm_atomic_elapsed(&cycles, &duration, ctr); + if (ok) + printf("%d: %lld\n", i, (unsigned long long)cycles); + else + printf("sadness\n"); + } + + psm_counter_destroy(ctr); +} |