aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2024-05-02 15:38:01 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2024-05-02 15:38:01 +1000
commit968f57e5a6ef801c9507589638d079aa6c5e587f (patch)
treeb2a3f28b0b58344fddfcb327b2376310abb80a6d
parent6b45f49e0bb38f3e22e5d0cd171db02dd775398d (diff)
parent7ea716a17f1f3ba7356f36cd340b3efaaa3e0653 (diff)
downloadlinux-next-968f57e5a6ef801c9507589638d079aa6c5e587f.tar.gz
Merge branch 'slab/for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/vbabka/slab.git
Notice: this object is not reachable from any branch.
Notice: this object is not reachable from any branch.
-rw-r--r--include/linux/slab.h4
-rw-r--r--lib/slub_kunit.c2
-rw-r--r--mm/slab.h3
-rw-r--r--mm/slab_common.c2
-rw-r--r--mm/slub.c168
5 files changed, 117 insertions, 62 deletions
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 88426b015faae..cb4559c36e123 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -279,7 +279,7 @@ void kfree(const void *objp);
void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);
-DEFINE_FREE(kfree, void *, if (_T) kfree(_T))
+DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
/**
* ksize - Report actual allocation size of associated object
@@ -822,7 +822,7 @@ extern void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp
#define kvrealloc(...) alloc_hooks(kvrealloc_noprof(__VA_ARGS__))
extern void kvfree(const void *addr);
-DEFINE_FREE(kvfree, void *, if (_T) kvfree(_T))
+DEFINE_FREE(kvfree, void *, if (!IS_ERR_OR_NULL(_T)) kvfree(_T))
extern void kvfree_sensitive(const void *addr, size_t len);
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index d4a3730b08fa7..4ce9604388069 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -55,7 +55,7 @@ static void test_next_pointer(struct kunit *test)
ptr_addr = (unsigned long *)(p + s->offset);
tmp = *ptr_addr;
- p[s->offset] = 0x12;
+ p[s->offset] = ~p[s->offset];
/*
* Expecting three errors.
diff --git a/mm/slab.h b/mm/slab.h
index d12fb4392e353..5f8f47c5bee0a 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -496,9 +496,6 @@ struct slabinfo {
};
void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo);
-void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s);
-ssize_t slabinfo_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *ppos);
#ifdef CONFIG_SLUB_DEBUG
#ifdef CONFIG_SLUB_DEBUG_ON
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3179a6aeffc56..8664da88e8439 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1078,7 +1078,6 @@ static void cache_show(struct kmem_cache *s, struct seq_file *m)
sinfo.limit, sinfo.batchcount, sinfo.shared);
seq_printf(m, " : slabdata %6lu %6lu %6lu",
sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
- slabinfo_show_stats(m, s);
seq_putc(m, '\n');
}
@@ -1155,7 +1154,6 @@ static const struct proc_ops slabinfo_proc_ops = {
.proc_flags = PROC_ENTRY_PERMANENT,
.proc_open = slabinfo_open,
.proc_read = seq_read,
- .proc_write = slabinfo_write,
.proc_lseek = seq_lseek,
.proc_release = seq_release,
};
diff --git a/mm/slub.c b/mm/slub.c
index 3e33ff900d35e..9a21ec7a2ee9f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -557,6 +557,26 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
*(freeptr_t *)freeptr_addr = freelist_ptr_encode(s, fp, freeptr_addr);
}
+/*
+ * See comment in calculate_sizes().
+ */
+static inline bool freeptr_outside_object(struct kmem_cache *s)
+{
+ return s->offset >= s->inuse;
+}
+
+/*
+ * Return offset of the end of info block which is inuse + free pointer if
+ * not overlapping with object.
+ */
+static inline unsigned int get_info_end(struct kmem_cache *s)
+{
+ if (freeptr_outside_object(s))
+ return s->inuse + sizeof(void *);
+ else
+ return s->inuse;
+}
+
/* Loop over all objects in a slab */
#define for_each_object(__p, __s, __addr, __objects) \
for (__p = fixup_red_left(__s, __addr); \
@@ -604,11 +624,21 @@ static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
nr_slabs = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
s->cpu_partial_slabs = nr_slabs;
}
+
+static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s)
+{
+ return s->cpu_partial_slabs;
+}
#else
static inline void
slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
{
}
+
+static inline unsigned int slub_get_cpu_partial(struct kmem_cache *s)
+{
+ return 0;
+}
#endif /* CONFIG_SLUB_CPU_PARTIAL */
/*
@@ -839,26 +869,6 @@ static void print_section(char *level, char *text, u8 *addr,
metadata_access_disable();
}
-/*
- * See comment in calculate_sizes().
- */
-static inline bool freeptr_outside_object(struct kmem_cache *s)
-{
- return s->offset >= s->inuse;
-}
-
-/*
- * Return offset of the end of info block which is inuse + free pointer if
- * not overlapping with object.
- */
-static inline unsigned int get_info_end(struct kmem_cache *s)
-{
- if (freeptr_outside_object(s))
- return s->inuse + sizeof(void *);
- else
- return s->inuse;
-}
-
static struct track *get_track(struct kmem_cache *s, void *object,
enum track_item alloc)
{
@@ -2166,15 +2176,20 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
*
* The initialization memset's clear the object and the metadata,
* but don't touch the SLAB redzone.
+ *
+ * The object's freepointer is also avoided if stored outside the
+ * object.
*/
if (unlikely(init)) {
int rsize;
+ unsigned int inuse;
+ inuse = get_info_end(s);
if (!kasan_has_integrated_init())
memset(kasan_reset_tag(x), 0, s->object_size);
rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
- memset((char *)kasan_reset_tag(x) + s->inuse, 0,
- s->size - s->inuse - rsize);
+ memset((char *)kasan_reset_tag(x) + inuse, 0,
+ s->size - inuse - rsize);
}
/* KASAN might put x into memory quarantine, delaying its reuse. */
return !kasan_slab_free(s, x, init);
@@ -2678,19 +2693,18 @@ static struct slab *get_partial_node(struct kmem_cache *s,
if (!partial) {
partial = slab;
stat(s, ALLOC_FROM_PARTIAL);
+
+ if ((slub_get_cpu_partial(s) == 0)) {
+ break;
+ }
} else {
put_cpu_partial(s, slab, 0);
stat(s, CPU_PARTIAL_NODE);
- partial_slabs++;
- }
-#ifdef CONFIG_SLUB_CPU_PARTIAL
- if (!kmem_cache_has_cpu_partial(s)
- || partial_slabs > s->cpu_partial_slabs / 2)
- break;
-#else
- break;
-#endif
+ if (++partial_slabs > slub_get_cpu_partial(s) / 2) {
+ break;
+ }
+ }
}
spin_unlock_irqrestore(&n->list_lock, flags);
return partial;
@@ -2773,7 +2787,7 @@ static struct slab *get_partial(struct kmem_cache *s, int node,
searchnode = numa_mem_id();
slab = get_partial_node(s, get_node(s, searchnode), pc);
- if (slab || node != NUMA_NO_NODE)
+ if (slab || (node != NUMA_NO_NODE && (pc->flags & __GFP_THISNODE)))
return slab;
return get_any_partial(s, pc);
@@ -3303,6 +3317,43 @@ static unsigned long count_partial(struct kmem_cache_node *n,
#endif /* CONFIG_SLUB_DEBUG || SLAB_SUPPORTS_SYSFS */
#ifdef CONFIG_SLUB_DEBUG
+#define MAX_PARTIAL_TO_SCAN 10000
+
+static unsigned long count_partial_free_approx(struct kmem_cache_node *n)
+{
+ unsigned long flags;
+ unsigned long x = 0;
+ struct slab *slab;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ if (n->nr_partial <= MAX_PARTIAL_TO_SCAN) {
+ list_for_each_entry(slab, &n->partial, slab_list)
+ x += slab->objects - slab->inuse;
+ } else {
+ /*
+ * For a long list, approximate the total count of objects in
+ * it to meet the limit on the number of slabs to scan.
+ * Scan from both the list's head and tail for better accuracy.
+ */
+ unsigned long scanned = 0;
+
+ list_for_each_entry(slab, &n->partial, slab_list) {
+ x += slab->objects - slab->inuse;
+ if (++scanned == MAX_PARTIAL_TO_SCAN / 2)
+ break;
+ }
+ list_for_each_entry_reverse(slab, &n->partial, slab_list) {
+ x += slab->objects - slab->inuse;
+ if (++scanned == MAX_PARTIAL_TO_SCAN)
+ break;
+ }
+ x = mult_frac(x, n->nr_partial, scanned);
+ x = min(x, node_nr_objs(n));
+ }
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+}
+
static noinline void
slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
{
@@ -3329,7 +3380,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
unsigned long nr_objs;
unsigned long nr_free;
- nr_free = count_partial(n, count_free);
+ nr_free = count_partial_free_approx(n);
nr_slabs = node_nr_slabs(n);
nr_objs = node_nr_objs(n);
@@ -3449,6 +3500,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
struct slab *slab;
unsigned long flags;
struct partial_context pc;
+ bool try_thisnode = true;
stat(s, ALLOC_SLOWPATH);
@@ -3575,6 +3627,21 @@ new_slab:
new_objects:
pc.flags = gfpflags;
+ /*
+ * When a preferred node is indicated but no __GFP_THISNODE
+ *
+ * 1) try to get a partial slab from target node only by having
+ * __GFP_THISNODE in pc.flags for get_partial()
+ * 2) if 1) failed, try to allocate a new slab from target node with
+ * GPF_NOWAIT | __GFP_THISNODE opportunistically
+ * 3) if 2) failed, retry with original gfpflags which will allow
+ * get_partial() try partial lists of other nodes before potentially
+ * allocating new page from other nodes
+ */
+ if (unlikely(node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE)
+ && try_thisnode))
+ pc.flags = GFP_NOWAIT | __GFP_THISNODE;
+
pc.orig_size = orig_size;
slab = get_partial(s, node, &pc);
if (slab) {
@@ -3596,10 +3663,15 @@ new_objects:
}
slub_put_cpu_ptr(s->cpu_slab);
- slab = new_slab(s, gfpflags, node);
+ slab = new_slab(s, pc.flags, node);
c = slub_get_cpu_ptr(s->cpu_slab);
if (unlikely(!slab)) {
+ if (node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE)
+ && try_thisnode) {
+ try_thisnode = false;
+ goto new_objects;
+ }
slab_out_of_memory(s, gfpflags, node);
return NULL;
}
@@ -3796,7 +3868,8 @@ static void *__slab_alloc_node(struct kmem_cache *s,
static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
void *obj)
{
- if (unlikely(slab_want_init_on_free(s)) && obj)
+ if (unlikely(slab_want_init_on_free(s)) && obj &&
+ !freeptr_outside_object(s))
memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
0, sizeof(void *));
}
@@ -4308,7 +4381,7 @@ redo:
c = raw_cpu_ptr(s->cpu_slab);
tid = READ_ONCE(c->tid);
- /* Same with comment on barrier() in slab_alloc_node() */
+ /* Same with comment on barrier() in __slab_alloc_node() */
barrier();
if (unlikely(slab != c->slab)) {
@@ -4938,7 +5011,6 @@ static void early_kmem_cache_node_alloc(int node)
BUG_ON(!n);
#ifdef CONFIG_SLUB_DEBUG
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
- init_tracking(kmem_cache_node, n);
#endif
n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
slab->freelist = get_freepointer(kmem_cache_node, n);
@@ -5151,9 +5223,7 @@ static int calculate_sizes(struct kmem_cache *s)
if ((int)order < 0)
return 0;
- s->allocflags = 0;
- if (order)
- s->allocflags |= __GFP_COMP;
+ s->allocflags = __GFP_COMP;
if (s->flags & SLAB_CACHE_DMA)
s->allocflags |= GFP_DMA;
@@ -6128,7 +6198,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
else if (flags & SO_OBJECTS)
WARN_ON_ONCE(1);
else
- x = slab->slabs;
+ x = data_race(slab->slabs);
total += x;
nodes[node] += x;
}
@@ -6333,7 +6403,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (slab)
- slabs += slab->slabs;
+ slabs += data_race(slab->slabs);
}
#endif
@@ -6347,7 +6417,7 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
slab = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
if (slab) {
- slabs = READ_ONCE(slab->slabs);
+ slabs = data_race(slab->slabs);
objects = (slabs * oo_objects(s->oo)) / 2;
len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
cpu, objects, slabs);
@@ -7181,7 +7251,7 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
for_each_kmem_cache_node(s, node, n) {
nr_slabs += node_nr_slabs(n);
nr_objs += node_nr_objs(n);
- nr_free += count_partial(n, count_free);
+ nr_free += count_partial_free_approx(n);
}
sinfo->active_objs = nr_objs - nr_free;
@@ -7191,14 +7261,4 @@ void get_slabinfo(struct kmem_cache *s, struct slabinfo *sinfo)
sinfo->objects_per_slab = oo_objects(s->oo);
sinfo->cache_order = oo_order(s->oo);
}
-
-void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *s)
-{
-}
-
-ssize_t slabinfo_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- return -EIO;
-}
#endif /* CONFIG_SLUB_DEBUG */