mm-More-lock-breaks-in-slab.c.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

From 7034ac58dbaafff9a12b61f813ec620ace39c917 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 3 Jul 2009 08:44:43 -0500
Subject: [PATCH] mm: More lock breaks in slab.c

commit 4672c44017f4f0545122dfefd09cdd68c0c848ff in tip.

Handle __free_pages outside of the locked regions. This reduces the
lock contention on the percpu slab locks in -rt significantly.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 mm/slab.c |   58 ++++++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index ac90a26..e939a38 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -180,28 +180,46 @@ static void slab_irq_disable_GFP_WAIT(gfp_t flags, int *cpu)
  * by a lock. This keeps the code preemptable - albeit at the cost of remote
  * memory access when the task does get migrated away.
  */
-DEFINE_PER_CPU_LOCKED(int, slab_irq_locks) = { 0, };
+DEFINE_PER_CPU_LOCKED(struct list_head, slab) = { 0, };
 
 static void _slab_irq_disable(int *cpu)
 {
-	get_cpu_var_locked(slab_irq_locks, cpu);
+	(void)get_cpu_var_locked(slab, cpu);
 }
 
 #define slab_irq_disable(cpu) _slab_irq_disable(&(cpu))
 
 static inline void slab_irq_enable(int cpu)
 {
-	put_cpu_var_locked(slab_irq_locks, cpu);
+	LIST_HEAD(list);
+
+	list_splice_init(&__get_cpu_var_locked(slab, cpu), &list);
+	put_cpu_var_locked(slab, cpu);
+
+	while (!list_empty(&list)) {
+		struct page *page = list_first_entry(&list, struct page, lru);
+		list_del(&page->lru);
+		__free_pages(page, page->index);
+	}
 }
 
 static inline void slab_irq_disable_this_rt(int cpu)
 {
-	spin_lock(&__get_cpu_lock(slab_irq_locks, cpu));
+	spin_lock(&__get_cpu_lock(slab, cpu));
 }
 
 static inline void slab_irq_enable_rt(int cpu)
 {
-	spin_unlock(&__get_cpu_lock(slab_irq_locks, cpu));
+	LIST_HEAD(list);
+
+	list_splice_init(&__get_cpu_var_locked(slab, cpu), &list);
+	spin_unlock(&__get_cpu_lock(slab, cpu));
+
+	while (!list_empty(&list)) {
+		struct page *page = list_first_entry(&list, struct page, lru);
+		list_del(&page->lru);
+		__free_pages(page, page->index);
+	}
 }
 
 # define slab_irq_save(flags, cpu) \
@@ -1517,6 +1535,12 @@ void __init kmem_cache_init(void)
 	int order;
 	int node;
 
+#ifdef CONFIG_PREEMPT_RT
+	for_each_possible_cpu(i) {
+		INIT_LIST_HEAD(&__get_cpu_var_locked(slab, i));
+	}
+#endif
+
 	if (num_possible_nodes() == 1)
 		use_alien_caches = 0;
 
@@ -1791,12 +1815,14 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 /*
  * Interface to system's page release.
  */
-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
+static void kmem_freepages(struct kmem_cache *cachep, void *addr, int cpu)
 {
 	unsigned long i = (1 << cachep->gfporder);
-	struct page *page = virt_to_page(addr);
+	struct page *page, *basepage = virt_to_page(addr);
 	const unsigned long nr_freed = i;
 
+	page = basepage;
+
 	kmemcheck_free_shadow(page, cachep->gfporder);
 
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1805,6 +1831,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	else
 		sub_zone_page_state(page_zone(page),
 				NR_SLAB_UNRECLAIMABLE, nr_freed);
+
 	while (i--) {
 		BUG_ON(!PageSlab(page));
 		__ClearPageSlab(page);
@@ -1812,6 +1839,13 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 	}
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
+
+#ifdef CONFIG_PREEMPT_RT
+	if (cpu >= 0) {
+		basepage->index = cachep->gfporder;
+		list_add(&basepage->lru, &__get_cpu_var_locked(slab, cpu));
+	} else
+#endif
 	free_pages((unsigned long)addr, cachep->gfporder);
 }
 
@@ -1820,7 +1854,7 @@ static void kmem_rcu_free(struct rcu_head *head)
 	struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
 	struct kmem_cache *cachep = slab_rcu->cachep;
 
-	kmem_freepages(cachep, slab_rcu->addr);
+	kmem_freepages(cachep, slab_rcu->addr, -1);
 	if (OFF_SLAB(cachep))
 		kmem_cache_free(cachep->slabp_cache, slab_rcu);
 }
@@ -2057,7 +2091,7 @@ slab_destroy(struct kmem_cache *cachep, struct slab *slabp, int *this_cpu)
 		slab_rcu->addr = addr;
 		call_rcu(&slab_rcu->head, kmem_rcu_free);
 	} else {
-		kmem_freepages(cachep, addr);
+		kmem_freepages(cachep, addr, *this_cpu);
 		if (OFF_SLAB(cachep)) {
 			if (this_cpu)
 				__cache_free(cachep->slabp_cache, slabp, this_cpu);
@@ -2595,9 +2629,9 @@ slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg)
 
 	check_irq_on();
 	for_each_online_cpu(i) {
-		spin_lock(&__get_cpu_lock(slab_irq_locks, i));
+		spin_lock(&__get_cpu_lock(slab, i));
 		func(arg, i);
-		spin_unlock(&__get_cpu_lock(slab_irq_locks, i));
+		spin_unlock(&__get_cpu_lock(slab, i));
 	}
 }
 #else
@@ -2988,7 +3022,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	spin_unlock(&l3->list_lock);
 	return 1;
 opps1:
-	kmem_freepages(cachep, objp);
+	kmem_freepages(cachep, objp, -1);
 failed:
 	slab_irq_disable_GFP_WAIT(local_flags, this_cpu);
 	return 0;
-- 
1.7.0.4