diff options
author | Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | 2024-02-29 16:30:54 -0500 |
---|---|---|
committer | Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | 2024-02-29 16:31:37 -0500 |
commit | 74b9c6b3bc832839c418566443cf5bee1929a90e (patch) | |
tree | 42128c52c6c1b9e675969daf9218bfa5eed44c37 | |
parent | 81477d235ad85490add35001c74497ba8d78a1ac (diff) | |
download | librseq-rseq-percpu-alloc.tar.gz |
rseq percpu alloc: Add percpu pool setrseq-percpu-alloc
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Change-Id: I1f17de5a7ad735471b9d3fbe42a78f49a795fea8
-rw-r--r-- | include/rseq/percpu-alloc.h | 12 | ||||
-rw-r--r-- | src/rseq-percpu-alloc.c | 258 |
2 files changed, 259 insertions, 11 deletions
diff --git a/include/rseq/percpu-alloc.h b/include/rseq/percpu-alloc.h index 862778e..7335ff3 100644 --- a/include/rseq/percpu-alloc.h +++ b/include/rseq/percpu-alloc.h @@ -12,10 +12,10 @@ struct rseq_percpu_pool; -struct rseq_percpu_pool *rseq_percpu_poll_create(size_t item_len, +struct rseq_percpu_pool *rseq_percpu_pool_create(size_t item_len, size_t percpu_len, int max_nr_cpus, int prot, int flags, int fd, off_t offset); -int rseq_percpu_poll_destroy(struct rseq_percpu_pool *pool); +int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool); void *rseq_percpu_malloc(struct rseq_percpu_pool *pool); void *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool); @@ -25,4 +25,12 @@ void *__rseq_percpu_ptr(void *ptr, int cpu); #define rseq_percpu_ptr(ptr, cpu) ((__typeof__(ptr)) __rseq_percpu_ptr(ptr, cpu)) +struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void); +int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set); +int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set, + struct rseq_percpu_pool *pool); + +void *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len); +void *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len); + #endif /* _RSEQ_PERCPU_ALLOC_H */ diff --git a/src/rseq-percpu-alloc.c b/src/rseq-percpu-alloc.c index c9674a9..c37a720 100644 --- a/src/rseq-percpu-alloc.c +++ b/src/rseq-percpu-alloc.c @@ -6,6 +6,10 @@ #include <assert.h> #include <string.h> #include <pthread.h> +#include <unistd.h> +#include <stdlib.h> +#include <rseq/compiler.h> + /* * rseq-percpu-alloc.c: rseq per-cpu memory allocator. @@ -15,11 +19,26 @@ * per-cpu, for a given maximum number of CPUs. */ -/* Use lowest byte of per-cpu addresses to index the pool. */ -#define OFFSET_SHIFT 8 +/* Use lowest bits of per-cpu addresses to index the pool. */ +#if RSEQ_BITS_PER_LONG == 64 +# define OFFSET_SHIFT 16 +#else +# define OFFSET_SHIFT 8 +#endif #define MAX_NR_POOLS (1U << OFFSET_SHIFT) #define POOL_MASK (MAX_NR_POOLS - 1) +#define POOL_SET_NR_ENTRIES (RSEQ_BITS_PER_LONG - OFFSET_SHIFT) +#define POOL_SET_MAX_ALLOC_LEN (1U << POOL_SET_NR_ENTRIES) + +#if RSEQ_BITS_PER_LONG == 64 +# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */ +#else +# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */ +#endif + +#define DEFAULT_PAGE_SIZE 4096 + struct free_list_node; struct free_list_node { @@ -34,6 +53,7 @@ struct rseq_percpu_pool { unsigned int index; size_t item_len; size_t percpu_len; + int item_order; int max_nr_cpus; /* @@ -49,20 +69,142 @@ struct rseq_percpu_pool { pthread_mutex_t lock; }; +//TODO: the array of pools should grow dynamically on create. static struct rseq_percpu_pool[MAX_NR_POOLS]; -struct rseq_percpu_pool *rseq_percpu_poll_create(size_t item_len, +/* + * Pool set entries are indexed by item_len rounded to the next power of + * 2. A pool set can contain NULL pool entries, in which case the next + * large enough entry will be used for allocation. + */ +struct rseq_percpu_pool_set { + /* This lock protects add vs malloc/zmalloc within the pool set. */ + pthread_mutex_t lock; + struct rseq_percpu_pool *entries[POOL_SET_NR_ENTRIES]; +}; + +#define __rseq_align_mask(v, mask) (((v) + (mask)) & ~(mask)) +#define rseq_align(v, align) __rseq_align_mask(v, (__typeof__(v)) (align) - 1) + +static __attribute__((unused)) +unsigned int fls_u64(uint64_t x) +{ + unsigned int r = 64; + + if (!x) + return 0; + + if (!(x & 0xFFFFFFFF00000000ULL)) { + x <<= 32; + r -= 32; + } + if (!(x & 0xFFFF000000000000ULL)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF00000000000000ULL)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF000000000000000ULL)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC000000000000000ULL)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x8000000000000000ULL)) { + x <<= 1; + r -= 1; + } + return r; +} + +static __attribute__((unused)) +unsigned int fls_u32(uint32_t x) +{ + unsigned int r = 32; + + if (!x) + return 0; + if (!(x & 0xFFFF0000U)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xFF000000U)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xF0000000U)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xC0000000U)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000U)) { + x <<= 1; + r -= 1; + } + return r; +} + +static +unsigned int fls_ulong(unsigned long x) +{ +#if RSEQ_BITS_PER_LONG == 32 + return fls_u32(x); +#else + return fls_u64(x); +#endif +} + +/* + * Return the minimum order for which x <= (1UL << order). + * Return -1 if x is 0. + */ +static +int get_count_order_ulong(unsigned long x) +{ + if (!x) + return -1; + + return fls_ulong(x - 1); +} + +struct rseq_percpu_pool *rseq_percpu_pool_create(size_t item_len, size_t percpu_len, int max_nr_cpus, int prot, int flags, int fd, off_t offset) { struct rseq_percpu_pool *pool; void *base; - int i; + int i, order; + long page_len; + + /* Make sure each item is large enough to contain free list pointers. */ + if (item_len < sizeof(void *)) + item_len = sizeof(void *); + + /* Align item_len on next power of two. */ + order = get_count_order_ulong(item_len); + if (order < 0) { + errno = EINVAL; + return NULL; + } + item_len = 1UL << order; + + /* Align percpu_len on page size. */ + page_len = sysconf(_SC_PAGE_SIZE); + if (page_len < 0) + page_len = DEFAULT_PAGE_SIZE; + percpu_len = rseq_align(percpu_len, page_len); if (max_nr_cpus < 0 || item_len > percpu_len || percpu_len > (UINTPTR_MAX >> OFFSET_SHIFT)) { errno = EINVAL; - return MAP_FAILED; + return NULL; } pthread_mutex_lock(&pool_lock); @@ -73,13 +215,13 @@ struct rseq_percpu_pool *rseq_percpu_poll_create(size_t item_len, goto found_empty; } errno = ENOMEM; - pool = MAP_FAILED; + pool = NULL; goto end; found_empty: base = mmap(NULL, percpu_len * max_nr_cpus, prot, flags, fd, offset); if (base == MAP_FAILED) { - pool = MAP_FAILED; + pool = NULL; goto end; } // TODO: integrate with libnuma to provide NUMA placement hints. @@ -88,12 +230,14 @@ found_empty: pool->base = base; pool->percpu_len = percpu_len; pool->index = i; + pool->item_len = item_len; + pool->item_order = order; end: pthread_mutex_unlock(&pool_lock); return pool; } -int rseq_percpu_poll_destroy(struct rseq_percpu_pool *pool) +int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool) { int ret; @@ -168,7 +312,7 @@ void *__rseq_percpu_malloc(struct rseq_percpu_pool *pool, bool zeroed) end: pthread_mutex_unlock(&pool->lock); if (zeroed && addr) - rseq_percpu_zero_offset(pool, offset_offset); + rseq_percpu_zero_offset(pool, item_offset); return addr; } @@ -198,3 +342,99 @@ void rseq_percpu_free(void *ptr) pool->free_list_head = item; pthread_mutex_unlock(&pool->lock); } + +struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void) +{ + struct rseq_percpu_pool_set *pool_set; + + pool_set = calloc(1, sizeof(struct rseq_percpu_pool_set)); + if (!pool_set) + return NULL; + pthread_mutex_init(&pool_set->lock, NULL); + return pool_set; +} + +int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set) +{ + int order; + + for (order = 0; order < POOL_SET_NR_ENTRIES; order++) { + struct rseq_percpu_pool *pool = pool_set->entries[order]; + + if (!pool) + continue; + ret = rseq_percpu_pool_destroy(pool); + if (ret) + abort(); + } + pthread_mutex_destroy(&pool_set->lock); + free(pool_set); +} + +/* Ownership of pool is handed over to pool set on success. */ +int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set, struct rseq_percpu_pool *pool) +{ + size_t item_order = pool->item_order; + int ret = 0; + + pthread_mutex_lock(&pool_set->lock); + if (pool_set[item_order]) { + errno = EBUSY; + ret = -1; + goto end; + } + pool_set[pool->item_order] = pool; +end: + pthread_mutex_unlock(&pool_set->lock); + return ret; +} + +static +void *__rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len, bool zeroed) +{ + struct free_list_node *node; + uintptr_t item_offset; + void *addr; + int order, min_order = POOL_SET_MIN_ENTRY; + +again: + pthread_mutex_lock(&pool_set->lock); + /* First smallest present pool where @len fits. */ + for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) { + struct rseq_percpu_pool *pool = pool_set.entries[order]; + + if (!pool) + continue; + if (pool->item_len >= len) + goto found; + } + pool = NULL; +found: + pthread_mutex_unlock(&pool_set->lock); + if (pool) { + addr = __rseq_percpu_malloc(pool, zeroed); + if (addr == NULL && errno == ENOMEM) { + /* + * If the allocation failed, try again with a + * larger pool. + */ + min_order = order + 1; + goto again; + } + } else { + /* Not found. */ + errno = ENOMEM; + addr = NULL; + } + return addr; +} + +void *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len) +{ + return __rseq_percpu_pool_set_malloc(pool_set, len, false); +} + +void *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool *pool_set, size_t len) +{ + return __rseq_percpu_pool_set_malloc(pool_set, len, true); +} |