aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>2024-02-29 16:30:54 -0500
committerMathieu Desnoyers <mathieu.desnoyers@efficios.com>2024-02-29 16:31:37 -0500
commit74b9c6b3bc832839c418566443cf5bee1929a90e (patch)
tree42128c52c6c1b9e675969daf9218bfa5eed44c37
parent81477d235ad85490add35001c74497ba8d78a1ac (diff)
downloadlibrseq-rseq-percpu-alloc.tar.gz
rseq percpu alloc: Add percpu pool setrseq-percpu-alloc
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Change-Id: I1f17de5a7ad735471b9d3fbe42a78f49a795fea8
-rw-r--r--include/rseq/percpu-alloc.h12
-rw-r--r--src/rseq-percpu-alloc.c258
2 files changed, 259 insertions, 11 deletions
diff --git a/include/rseq/percpu-alloc.h b/include/rseq/percpu-alloc.h
index 862778e..7335ff3 100644
--- a/include/rseq/percpu-alloc.h
+++ b/include/rseq/percpu-alloc.h
@@ -12,10 +12,10 @@
struct rseq_percpu_pool;
-struct rseq_percpu_pool *rseq_percpu_poll_create(size_t item_len,
+struct rseq_percpu_pool *rseq_percpu_pool_create(size_t item_len,
size_t percpu_len, int max_nr_cpus,
int prot, int flags, int fd, off_t offset);
-int rseq_percpu_poll_destroy(struct rseq_percpu_pool *pool);
+int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool);
void *rseq_percpu_malloc(struct rseq_percpu_pool *pool);
void *rseq_percpu_zmalloc(struct rseq_percpu_pool *pool);
@@ -25,4 +25,12 @@ void *__rseq_percpu_ptr(void *ptr, int cpu);
#define rseq_percpu_ptr(ptr, cpu) ((__typeof__(ptr)) __rseq_percpu_ptr(ptr, cpu))
+struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void);
+int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set);
+int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set,
+ struct rseq_percpu_pool *pool);
+
+void *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len);
+void *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool_set *pool_set, size_t len);
+
#endif /* _RSEQ_PERCPU_ALLOC_H */
diff --git a/src/rseq-percpu-alloc.c b/src/rseq-percpu-alloc.c
index c9674a9..c37a720 100644
--- a/src/rseq-percpu-alloc.c
+++ b/src/rseq-percpu-alloc.c
@@ -6,6 +6,10 @@
#include <assert.h>
#include <string.h>
#include <pthread.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <rseq/compiler.h>
+
/*
* rseq-percpu-alloc.c: rseq per-cpu memory allocator.
@@ -15,11 +19,26 @@
* per-cpu, for a given maximum number of CPUs.
*/
-/* Use lowest byte of per-cpu addresses to index the pool. */
-#define OFFSET_SHIFT 8
+/* Use lowest bits of per-cpu addresses to index the pool. */
+#if RSEQ_BITS_PER_LONG == 64
+# define OFFSET_SHIFT 16
+#else
+# define OFFSET_SHIFT 8
+#endif
#define MAX_NR_POOLS (1U << OFFSET_SHIFT)
#define POOL_MASK (MAX_NR_POOLS - 1)
+#define POOL_SET_NR_ENTRIES (RSEQ_BITS_PER_LONG - OFFSET_SHIFT)
+#define POOL_SET_MAX_ALLOC_LEN (1U << POOL_SET_NR_ENTRIES)
+
+#if RSEQ_BITS_PER_LONG == 64
+# define POOL_SET_MIN_ENTRY 3 /* Smallest item_len=8 */
+#else
+# define POOL_SET_MIN_ENTRY 2 /* Smallest item_len=4 */
+#endif
+
+#define DEFAULT_PAGE_SIZE 4096
+
struct free_list_node;
struct free_list_node {
@@ -34,6 +53,7 @@ struct rseq_percpu_pool {
unsigned int index;
size_t item_len;
size_t percpu_len;
+ int item_order;
int max_nr_cpus;
/*
@@ -49,20 +69,142 @@ struct rseq_percpu_pool {
pthread_mutex_t lock;
};
+//TODO: the array of pools should grow dynamically on create.
static struct rseq_percpu_pool[MAX_NR_POOLS];
-struct rseq_percpu_pool *rseq_percpu_poll_create(size_t item_len,
+/*
+ * Pool set entries are indexed by item_len rounded to the next power of
+ * 2. A pool set can contain NULL pool entries, in which case the next
+ * large enough entry will be used for allocation.
+ */
+struct rseq_percpu_pool_set {
+ /* This lock protects add vs malloc/zmalloc within the pool set. */
+ pthread_mutex_t lock;
+ struct rseq_percpu_pool *entries[POOL_SET_NR_ENTRIES];
+};
+
+#define __rseq_align_mask(v, mask) (((v) + (mask)) & ~(mask))
+#define rseq_align(v, align) __rseq_align_mask(v, (__typeof__(v)) (align) - 1)
+
+static __attribute__((unused))
+unsigned int fls_u64(uint64_t x)
+{
+ unsigned int r = 64;
+
+ if (!x)
+ return 0;
+
+ if (!(x & 0xFFFFFFFF00000000ULL)) {
+ x <<= 32;
+ r -= 32;
+ }
+ if (!(x & 0xFFFF000000000000ULL)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xFF00000000000000ULL)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xF000000000000000ULL)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xC000000000000000ULL)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x8000000000000000ULL)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+static __attribute__((unused))
+unsigned int fls_u32(uint32_t x)
+{
+ unsigned int r = 32;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xFFFF0000U)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xFF000000U)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xF0000000U)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xC0000000U)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000U)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+static
+unsigned int fls_ulong(unsigned long x)
+{
+#if RSEQ_BITS_PER_LONG == 32
+ return fls_u32(x);
+#else
+ return fls_u64(x);
+#endif
+}
+
+/*
+ * Return the minimum order for which x <= (1UL << order).
+ * Return -1 if x is 0.
+ */
+static
+int get_count_order_ulong(unsigned long x)
+{
+ if (!x)
+ return -1;
+
+ return fls_ulong(x - 1);
+}
+
+struct rseq_percpu_pool *rseq_percpu_pool_create(size_t item_len,
size_t percpu_len, int max_nr_cpus,
int prot, int flags, int fd, off_t offset)
{
struct rseq_percpu_pool *pool;
void *base;
- int i;
+ int i, order;
+ long page_len;
+
+ /* Make sure each item is large enough to contain free list pointers. */
+ if (item_len < sizeof(void *))
+ item_len = sizeof(void *);
+
+ /* Align item_len on next power of two. */
+ order = get_count_order_ulong(item_len);
+ if (order < 0) {
+ errno = EINVAL;
+ return NULL;
+ }
+ item_len = 1UL << order;
+
+ /* Align percpu_len on page size. */
+ page_len = sysconf(_SC_PAGE_SIZE);
+ if (page_len < 0)
+ page_len = DEFAULT_PAGE_SIZE;
+ percpu_len = rseq_align(percpu_len, page_len);
if (max_nr_cpus < 0 || item_len > percpu_len ||
percpu_len > (UINTPTR_MAX >> OFFSET_SHIFT)) {
errno = EINVAL;
- return MAP_FAILED;
+ return NULL;
}
pthread_mutex_lock(&pool_lock);
@@ -73,13 +215,13 @@ struct rseq_percpu_pool *rseq_percpu_poll_create(size_t item_len,
goto found_empty;
}
errno = ENOMEM;
- pool = MAP_FAILED;
+ pool = NULL;
goto end;
found_empty:
base = mmap(NULL, percpu_len * max_nr_cpus, prot, flags, fd, offset);
if (base == MAP_FAILED) {
- pool = MAP_FAILED;
+ pool = NULL;
goto end;
}
// TODO: integrate with libnuma to provide NUMA placement hints.
@@ -88,12 +230,14 @@ found_empty:
pool->base = base;
pool->percpu_len = percpu_len;
pool->index = i;
+ pool->item_len = item_len;
+ pool->item_order = order;
end:
pthread_mutex_unlock(&pool_lock);
return pool;
}
-int rseq_percpu_poll_destroy(struct rseq_percpu_pool *pool)
+int rseq_percpu_pool_destroy(struct rseq_percpu_pool *pool)
{
int ret;
@@ -168,7 +312,7 @@ void *__rseq_percpu_malloc(struct rseq_percpu_pool *pool, bool zeroed)
end:
pthread_mutex_unlock(&pool->lock);
if (zeroed && addr)
- rseq_percpu_zero_offset(pool, offset_offset);
+ rseq_percpu_zero_offset(pool, item_offset);
return addr;
}
@@ -198,3 +342,99 @@ void rseq_percpu_free(void *ptr)
pool->free_list_head = item;
pthread_mutex_unlock(&pool->lock);
}
+
+struct rseq_percpu_pool_set *rseq_percpu_pool_set_create(void)
+{
+ struct rseq_percpu_pool_set *pool_set;
+
+ pool_set = calloc(1, sizeof(struct rseq_percpu_pool_set));
+ if (!pool_set)
+ return NULL;
+ pthread_mutex_init(&pool_set->lock, NULL);
+ return pool_set;
+}
+
+int rseq_percpu_pool_set_destroy(struct rseq_percpu_pool_set *pool_set)
+{
+ int order;
+
+ for (order = 0; order < POOL_SET_NR_ENTRIES; order++) {
+ struct rseq_percpu_pool *pool = pool_set->entries[order];
+
+ if (!pool)
+ continue;
+ ret = rseq_percpu_pool_destroy(pool);
+ if (ret)
+ abort();
+ }
+ pthread_mutex_destroy(&pool_set->lock);
+ free(pool_set);
+}
+
+/* Ownership of pool is handed over to pool set on success. */
+int rseq_percpu_pool_set_add_pool(struct rseq_percpu_pool_set *pool_set, struct rseq_percpu_pool *pool)
+{
+ size_t item_order = pool->item_order;
+ int ret = 0;
+
+ pthread_mutex_lock(&pool_set->lock);
+ if (pool_set[item_order]) {
+ errno = EBUSY;
+ ret = -1;
+ goto end;
+ }
+ pool_set[pool->item_order] = pool;
+end:
+ pthread_mutex_unlock(&pool_set->lock);
+ return ret;
+}
+
+static
+void *__rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len, bool zeroed)
+{
+ struct free_list_node *node;
+ uintptr_t item_offset;
+ void *addr;
+ int order, min_order = POOL_SET_MIN_ENTRY;
+
+again:
+ pthread_mutex_lock(&pool_set->lock);
+ /* First smallest present pool where @len fits. */
+ for (order = min_order; order < POOL_SET_NR_ENTRIES; order++) {
+ struct rseq_percpu_pool *pool = pool_set.entries[order];
+
+ if (!pool)
+ continue;
+ if (pool->item_len >= len)
+ goto found;
+ }
+ pool = NULL;
+found:
+ pthread_mutex_unlock(&pool_set->lock);
+ if (pool) {
+ addr = __rseq_percpu_malloc(pool, zeroed);
+ if (addr == NULL && errno == ENOMEM) {
+ /*
+ * If the allocation failed, try again with a
+ * larger pool.
+ */
+ min_order = order + 1;
+ goto again;
+ }
+ } else {
+ /* Not found. */
+ errno = ENOMEM;
+ addr = NULL;
+ }
+ return addr;
+}
+
+void *rseq_percpu_pool_set_malloc(struct rseq_percpu_pool_set *pool_set, size_t len)
+{
+ return __rseq_percpu_pool_set_malloc(pool_set, len, false);
+}
+
+void *rseq_percpu_pool_set_zmalloc(struct rseq_percpu_pool *pool_set, size_t len)
+{
+ return __rseq_percpu_pool_set_malloc(pool_set, len, true);
+}