aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 13:04:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-29 13:04:15 -0700
commit651a00bc56403161351090a9d7ddbd7095975324 (patch)
tree921e3e06b419384d76b8ebd7c08b610dbe0d6b7b
parent9d6b14cd1e993d2ff98df0cef6d935ce6fd4dbec (diff)
parent3d053e8060430b86bad0854b7c7f03f15be3a7e5 (diff)
downloadcloudkernel-651a00bc56403161351090a9d7ddbd7095975324.tar.gz
Merge tag 'slab-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab
Pull slab updates from Vlastimil Babka: "This happens to be a small one (due to summer I guess), and all hardening related: - Randomized kmalloc caches, by GONG, Ruiqi. A new opt-in hardening feature to make heap spraying harder. It creates multiple (16) copies of kmalloc caches, reducing the chance of an attacker-controllable allocation site to land in the same slab as e.g. an allocation site with use-after-free vulnerability. The selection of the copy is derived from the allocation site address, including a per-boot random seed. - Stronger typing for hardened freelists in SLUB, by Jann Horn Introduces a custom type for hardened freelist entries instead of "void *" as those are not directly dereferencable. While reviewing this, I've noticed opportunities for further cleanups in that code and added those on top" * tag 'slab-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab: Randomized slab caches for kmalloc() mm/slub: remove freelist_dereference() mm/slub: remove redundant kasan_reset_tag() from freelist_ptr calculations mm/slub: refactor freelist to use custom type
-rw-r--r--include/linux/percpu.h12
-rw-r--r--include/linux/slab.h23
-rw-r--r--mm/Kconfig17
-rw-r--r--mm/kfence/kfence_test.c7
-rw-r--r--mm/slab.c2
-rw-r--r--mm/slab.h2
-rw-r--r--mm/slab_common.c49
-rw-r--r--mm/slub.c58
8 files changed, 130 insertions, 40 deletions
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index b3b4584423301..68fac2e7cbe67 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -35,6 +35,12 @@
#define PCPU_BITMAP_BLOCK_BITS (PCPU_BITMAP_BLOCK_SIZE >> \
PCPU_MIN_ALLOC_SHIFT)
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+#define PERCPU_DYNAMIC_SIZE_SHIFT 12
+#else
+#define PERCPU_DYNAMIC_SIZE_SHIFT 10
+#endif
+
/*
* Percpu allocator can serve percpu allocations before slab is
* initialized which allows slab to depend on the percpu allocator.
@@ -42,7 +48,7 @@
* for this. Keep PERCPU_DYNAMIC_RESERVE equal to or larger than
* PERCPU_DYNAMIC_EARLY_SIZE.
*/
-#define PERCPU_DYNAMIC_EARLY_SIZE (20 << 10)
+#define PERCPU_DYNAMIC_EARLY_SIZE (20 << PERCPU_DYNAMIC_SIZE_SHIFT)
/*
* PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
@@ -56,9 +62,9 @@
* intelligent way to determine this would be nice.
*/
#if BITS_PER_LONG > 32
-#define PERCPU_DYNAMIC_RESERVE (28 << 10)
+#define PERCPU_DYNAMIC_RESERVE (28 << PERCPU_DYNAMIC_SIZE_SHIFT)
#else
-#define PERCPU_DYNAMIC_RESERVE (20 << 10)
+#define PERCPU_DYNAMIC_RESERVE (20 << PERCPU_DYNAMIC_SIZE_SHIFT)
#endif
extern void *pcpu_base_addr;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 848c7c82ad5ad..8228d1276a2f6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -19,6 +19,7 @@
#include <linux/workqueue.h>
#include <linux/percpu-refcount.h>
#include <linux/cleanup.h>
+#include <linux/hash.h>
/*
@@ -345,6 +346,12 @@ static inline unsigned int arch_slab_minalign(void)
#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \
(KMALLOC_MIN_SIZE) : 16)
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+#define RANDOM_KMALLOC_CACHES_NR 15 // # of cache copies
+#else
+#define RANDOM_KMALLOC_CACHES_NR 0
+#endif
+
/*
* Whenever changing this, take care of that kmalloc_type() and
* create_kmalloc_caches() still work as intended.
@@ -361,6 +368,8 @@ enum kmalloc_cache_type {
#ifndef CONFIG_MEMCG_KMEM
KMALLOC_CGROUP = KMALLOC_NORMAL,
#endif
+ KMALLOC_RANDOM_START = KMALLOC_NORMAL,
+ KMALLOC_RANDOM_END = KMALLOC_RANDOM_START + RANDOM_KMALLOC_CACHES_NR,
#ifdef CONFIG_SLUB_TINY
KMALLOC_RECLAIM = KMALLOC_NORMAL,
#else
@@ -386,14 +395,22 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
(IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \
(IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0))
-static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
+extern unsigned long random_kmalloc_seed;
+
+static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags, unsigned long caller)
{
/*
* The most common case is KMALLOC_NORMAL, so test for it
* with a single branch for all the relevant flags.
*/
if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0))
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+ /* RANDOM_KMALLOC_CACHES_NR (=15) copies + the KMALLOC_NORMAL */
+ return KMALLOC_RANDOM_START + hash_64(caller ^ random_kmalloc_seed,
+ ilog2(RANDOM_KMALLOC_CACHES_NR + 1));
+#else
return KMALLOC_NORMAL;
+#endif
/*
* At least one of the flags has to be set. Their priorities in
@@ -580,7 +597,7 @@ static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
index = kmalloc_index(size);
return kmalloc_trace(
- kmalloc_caches[kmalloc_type(flags)][index],
+ kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, size);
}
return __kmalloc(size, flags);
@@ -596,7 +613,7 @@ static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t fla
index = kmalloc_index(size);
return kmalloc_node_trace(
- kmalloc_caches[kmalloc_type(flags)][index],
+ kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index],
flags, node, size);
}
return __kmalloc_node(size, flags, node);
diff --git a/mm/Kconfig b/mm/Kconfig
index 09130434e30d3..4bf7dc5ae5ef9 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -337,6 +337,23 @@ config SLUB_CPU_PARTIAL
which requires the taking of locks that may cause latency spikes.
Typically one would choose no for a realtime system.
+config RANDOM_KMALLOC_CACHES
+ default n
+ depends on SLUB && !SLUB_TINY
+ bool "Randomize slab caches for normal kmalloc"
+ help
+ A hardening feature that creates multiple copies of slab caches for
+ normal kmalloc allocation and makes kmalloc randomly pick one based
+ on code address, which makes the attackers more difficult to spray
+ vulnerable memory objects on the heap for the purpose of exploiting
+ memory vulnerabilities.
+
+ Currently the number of copies is set to 16, a reasonably large value
+ that effectively diverges the memory objects allocated for different
+ subsystems or modules into different caches, at the expense of a
+ limited degree of memory and CPU overhead that relates to hardware and
+ system workload.
+
endmenu # SLAB allocator options
config SHUFFLE_PAGE_ALLOCATOR
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index 9e008a336d9f7..95b2b84c296d0 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -212,7 +212,9 @@ static void test_cache_destroy(void)
static inline size_t kmalloc_cache_alignment(size_t size)
{
- return kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)]->align;
+ /* just to get ->align so no need to pass in the real caller */
+ enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, 0);
+ return kmalloc_caches[type][__kmalloc_index(size, false)]->align;
}
/* Must always inline to match stack trace against caller. */
@@ -282,8 +284,9 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat
if (is_kfence_address(alloc)) {
struct slab *slab = virt_to_slab(alloc);
+ enum kmalloc_cache_type type = kmalloc_type(GFP_KERNEL, _RET_IP_);
struct kmem_cache *s = test_cache ?:
- kmalloc_caches[kmalloc_type(GFP_KERNEL)][__kmalloc_index(size, false)];
+ kmalloc_caches[type][__kmalloc_index(size, false)];
/*
* Verify that various helpers return the right values
diff --git a/mm/slab.c b/mm/slab.c
index 88194391d5531..9ad3d0f2d1a5e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1670,7 +1670,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
if (freelist_size > KMALLOC_MAX_CACHE_SIZE) {
freelist_cache_size = PAGE_SIZE << get_order(freelist_size);
} else {
- freelist_cache = kmalloc_slab(freelist_size, 0u);
+ freelist_cache = kmalloc_slab(freelist_size, 0u, _RET_IP_);
if (!freelist_cache)
continue;
freelist_cache_size = freelist_cache->size;
diff --git a/mm/slab.h b/mm/slab.h
index 9c0e09d0f81f0..799a315695c67 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -282,7 +282,7 @@ void setup_kmalloc_cache_index_table(void);
void create_kmalloc_caches(slab_flags_t);
/* Find the kmalloc slab corresponding for a certain size */
-struct kmem_cache *kmalloc_slab(size_t, gfp_t);
+struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller);
void *__kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags,
int node, size_t orig_size,
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d1555ea2981ac..01cdbf1224636 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -678,6 +678,11 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init =
{ /* initialization for https://bugs.llvm.org/show_bug.cgi?id=42570 */ };
EXPORT_SYMBOL(kmalloc_caches);
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+unsigned long random_kmalloc_seed __ro_after_init;
+EXPORT_SYMBOL(random_kmalloc_seed);
+#endif
+
/*
* Conversion table for small slabs sizes / 8 to the index in the
* kmalloc array. This is necessary for slabs < 192 since we have non power
@@ -720,7 +725,7 @@ static inline unsigned int size_index_elem(unsigned int bytes)
* Find the kmem_cache structure that serves a given size of
* allocation
*/
-struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
+struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags, unsigned long caller)
{
unsigned int index;
@@ -735,7 +740,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
index = fls(size - 1);
}
- return kmalloc_caches[kmalloc_type(flags)][index];
+ return kmalloc_caches[kmalloc_type(flags, caller)][index];
}
size_t kmalloc_size_roundup(size_t size)
@@ -752,8 +757,11 @@ size_t kmalloc_size_roundup(size_t size)
if (size > KMALLOC_MAX_CACHE_SIZE)
return PAGE_SIZE << get_order(size);
- /* The flags don't matter since size_index is common to all. */
- c = kmalloc_slab(size, GFP_KERNEL);
+ /*
+ * The flags don't matter since size_index is common to all.
+ * Neither does the caller for just getting ->object_size.
+ */
+ c = kmalloc_slab(size, GFP_KERNEL, 0);
return c ? c->object_size : 0;
}
EXPORT_SYMBOL(kmalloc_size_roundup);
@@ -776,12 +784,35 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
#define KMALLOC_RCL_NAME(sz)
#endif
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+#define __KMALLOC_RANDOM_CONCAT(a, b) a ## b
+#define KMALLOC_RANDOM_NAME(N, sz) __KMALLOC_RANDOM_CONCAT(KMA_RAND_, N)(sz)
+#define KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 1] = "kmalloc-rnd-01-" #sz,
+#define KMA_RAND_2(sz) KMA_RAND_1(sz) .name[KMALLOC_RANDOM_START + 2] = "kmalloc-rnd-02-" #sz,
+#define KMA_RAND_3(sz) KMA_RAND_2(sz) .name[KMALLOC_RANDOM_START + 3] = "kmalloc-rnd-03-" #sz,
+#define KMA_RAND_4(sz) KMA_RAND_3(sz) .name[KMALLOC_RANDOM_START + 4] = "kmalloc-rnd-04-" #sz,
+#define KMA_RAND_5(sz) KMA_RAND_4(sz) .name[KMALLOC_RANDOM_START + 5] = "kmalloc-rnd-05-" #sz,
+#define KMA_RAND_6(sz) KMA_RAND_5(sz) .name[KMALLOC_RANDOM_START + 6] = "kmalloc-rnd-06-" #sz,
+#define KMA_RAND_7(sz) KMA_RAND_6(sz) .name[KMALLOC_RANDOM_START + 7] = "kmalloc-rnd-07-" #sz,
+#define KMA_RAND_8(sz) KMA_RAND_7(sz) .name[KMALLOC_RANDOM_START + 8] = "kmalloc-rnd-08-" #sz,
+#define KMA_RAND_9(sz) KMA_RAND_8(sz) .name[KMALLOC_RANDOM_START + 9] = "kmalloc-rnd-09-" #sz,
+#define KMA_RAND_10(sz) KMA_RAND_9(sz) .name[KMALLOC_RANDOM_START + 10] = "kmalloc-rnd-10-" #sz,
+#define KMA_RAND_11(sz) KMA_RAND_10(sz) .name[KMALLOC_RANDOM_START + 11] = "kmalloc-rnd-11-" #sz,
+#define KMA_RAND_12(sz) KMA_RAND_11(sz) .name[KMALLOC_RANDOM_START + 12] = "kmalloc-rnd-12-" #sz,
+#define KMA_RAND_13(sz) KMA_RAND_12(sz) .name[KMALLOC_RANDOM_START + 13] = "kmalloc-rnd-13-" #sz,
+#define KMA_RAND_14(sz) KMA_RAND_13(sz) .name[KMALLOC_RANDOM_START + 14] = "kmalloc-rnd-14-" #sz,
+#define KMA_RAND_15(sz) KMA_RAND_14(sz) .name[KMALLOC_RANDOM_START + 15] = "kmalloc-rnd-15-" #sz,
+#else // CONFIG_RANDOM_KMALLOC_CACHES
+#define KMALLOC_RANDOM_NAME(N, sz)
+#endif
+
#define INIT_KMALLOC_INFO(__size, __short_size) \
{ \
.name[KMALLOC_NORMAL] = "kmalloc-" #__short_size, \
KMALLOC_RCL_NAME(__short_size) \
KMALLOC_CGROUP_NAME(__short_size) \
KMALLOC_DMA_NAME(__short_size) \
+ KMALLOC_RANDOM_NAME(RANDOM_KMALLOC_CACHES_NR, __short_size) \
.size = __size, \
}
@@ -890,6 +921,11 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
flags |= SLAB_CACHE_DMA;
}
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+ if (type >= KMALLOC_RANDOM_START && type <= KMALLOC_RANDOM_END)
+ flags |= SLAB_NO_MERGE;
+#endif
+
/*
* If CONFIG_MEMCG_KMEM is enabled, disable cache merging for
* KMALLOC_NORMAL caches.
@@ -941,6 +977,9 @@ void __init create_kmalloc_caches(slab_flags_t flags)
new_kmalloc_cache(2, type, flags);
}
}
+#ifdef CONFIG_RANDOM_KMALLOC_CACHES
+ random_kmalloc_seed = get_random_u64();
+#endif
/* Kmalloc array is now usable */
slab_state = UP;
@@ -976,7 +1015,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller
return ret;
}
- s = kmalloc_slab(size, flags);
+ s = kmalloc_slab(size, flags, caller);
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
diff --git a/mm/slub.c b/mm/slub.c
index e3b5d5c0eb3ad..f7940048138c5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -361,43 +361,51 @@ static struct workqueue_struct *flushwq;
*******************************************************************/
/*
+ * freeptr_t represents a SLUB freelist pointer, which might be encoded
+ * and not dereferenceable if CONFIG_SLAB_FREELIST_HARDENED is enabled.
+ */
+typedef struct { unsigned long v; } freeptr_t;
+
+/*
* Returns freelist pointer (ptr). With hardening, this is obfuscated
* with an XOR of the address where the pointer is held and a per-cache
* random number.
*/
-static inline void *freelist_ptr(const struct kmem_cache *s, void *ptr,
- unsigned long ptr_addr)
+static inline freeptr_t freelist_ptr_encode(const struct kmem_cache *s,
+ void *ptr, unsigned long ptr_addr)
{
+ unsigned long encoded;
+
#ifdef CONFIG_SLAB_FREELIST_HARDENED
- /*
- * When CONFIG_KASAN_SW/HW_TAGS is enabled, ptr_addr might be tagged.
- * Normally, this doesn't cause any issues, as both set_freepointer()
- * and get_freepointer() are called with a pointer with the same tag.
- * However, there are some issues with CONFIG_SLUB_DEBUG code. For
- * example, when __free_slub() iterates over objects in a cache, it
- * passes untagged pointers to check_object(). check_object() in turns
- * calls get_freepointer() with an untagged pointer, which causes the
- * freepointer to be restored incorrectly.
- */
- return (void *)((unsigned long)ptr ^ s->random ^
- swab((unsigned long)kasan_reset_tag((void *)ptr_addr)));
+ encoded = (unsigned long)ptr ^ s->random ^ swab(ptr_addr);
#else
- return ptr;
+ encoded = (unsigned long)ptr;
#endif
+ return (freeptr_t){.v = encoded};
}
-/* Returns the freelist pointer recorded at location ptr_addr. */
-static inline void *freelist_dereference(const struct kmem_cache *s,
- void *ptr_addr)
+static inline void *freelist_ptr_decode(const struct kmem_cache *s,
+ freeptr_t ptr, unsigned long ptr_addr)
{
- return freelist_ptr(s, (void *)*(unsigned long *)(ptr_addr),
- (unsigned long)ptr_addr);
+ void *decoded;
+
+#ifdef CONFIG_SLAB_FREELIST_HARDENED
+ decoded = (void *)(ptr.v ^ s->random ^ swab(ptr_addr));
+#else
+ decoded = (void *)ptr.v;
+#endif
+ return decoded;
}
static inline void *get_freepointer(struct kmem_cache *s, void *object)
{
+ unsigned long ptr_addr;
+ freeptr_t p;
+
object = kasan_reset_tag(object);
- return freelist_dereference(s, object + s->offset);
+ ptr_addr = (unsigned long)object + s->offset;
+ p = *(freeptr_t *)(ptr_addr);
+ return freelist_ptr_decode(s, p, ptr_addr);
}
#ifndef CONFIG_SLUB_TINY
@@ -421,15 +429,15 @@ __no_kmsan_checks
static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
{
unsigned long freepointer_addr;
- void *p;
+ freeptr_t p;
if (!debug_pagealloc_enabled_static())
return get_freepointer(s, object);
object = kasan_reset_tag(object);
freepointer_addr = (unsigned long)object + s->offset;
- copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
- return freelist_ptr(s, p, freepointer_addr);
+ copy_from_kernel_nofault(&p, (freeptr_t *)freepointer_addr, sizeof(p));
+ return freelist_ptr_decode(s, p, freepointer_addr);
}
static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
@@ -441,7 +449,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
#endif
freeptr_addr = (unsigned long)kasan_reset_tag((void *)freeptr_addr);
- *(void **)freeptr_addr = freelist_ptr(s, fp, freeptr_addr);
+ *(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr);
}
/* Loop over all objects in a slab */