1 From e59760df865009e750972664f06ea565109a58b2 Mon Sep 17 00:00:00 2001
2 From: Thomas Gleixner <tglx@linutronix.de>
3 Date: Thu, 25 Oct 2012 10:32:35 +0100
4 Subject: [PATCH 066/366] mm: Enable SLUB for RT
6 Make SLUB RT aware by converting locks to raw and using free lists to
7 move the freeing out of the lock held region.
9 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
12 mm/slub.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
13 2 files changed, 102 insertions(+), 27 deletions(-)
15 diff --git a/mm/slab.h b/mm/slab.h
16 index 7b60871..afdc579 100644
19 @@ -324,7 +324,11 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
20 * The slab lists for all objects.
22 struct kmem_cache_node {
24 + raw_spinlock_t list_lock;
30 struct list_head slabs_partial; /* partial list first, better asm code */
31 diff --git a/mm/slub.c b/mm/slub.c
32 index 65d5f92..492dbfd 100644
35 @@ -1075,7 +1075,7 @@ static noinline struct kmem_cache_node *free_debug_processing(
39 - spin_lock_irqsave(&n->list_lock, *flags);
40 + raw_spin_lock_irqsave(&n->list_lock, *flags);
43 if (!check_slab(s, page))
44 @@ -1136,7 +1136,7 @@ out:
48 - spin_unlock_irqrestore(&n->list_lock, *flags);
49 + raw_spin_unlock_irqrestore(&n->list_lock, *flags);
50 slab_fix(s, "Object at 0x%p not freed", object);
53 @@ -1263,6 +1263,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node,
55 #endif /* CONFIG_SLUB_DEBUG */
57 +struct slub_free_list {
58 + raw_spinlock_t lock;
59 + struct list_head list;
61 +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
64 * Hooks for other subsystems that check memory allocations. In a typical
65 * production configuration these hooks all should produce no code at all.
66 @@ -1402,7 +1408,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
68 flags &= gfp_allowed_mask;
70 +#ifdef CONFIG_PREEMPT_RT_FULL
71 + if (system_state == SYSTEM_RUNNING)
73 if (gfpflags_allow_blocking(flags))
77 flags |= s->allocflags;
78 @@ -1473,7 +1483,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
82 +#ifdef CONFIG_PREEMPT_RT_FULL
83 + if (system_state == SYSTEM_RUNNING)
85 if (gfpflags_allow_blocking(flags))
90 @@ -1529,6 +1543,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
91 __free_kmem_pages(page, order);
94 +static void free_delayed(struct list_head *h)
96 + while(!list_empty(h)) {
97 + struct page *page = list_first_entry(h, struct page, lru);
99 + list_del(&page->lru);
100 + __free_slab(page->slab_cache, page);
104 #define need_reserve_slab_rcu \
105 (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head))
107 @@ -1560,6 +1584,12 @@ static void free_slab(struct kmem_cache *s, struct page *page)
110 call_rcu(head, rcu_free_slab);
111 + } else if (irqs_disabled()) {
112 + struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
114 + raw_spin_lock(&f->lock);
115 + list_add(&page->lru, &f->list);
116 + raw_spin_unlock(&f->lock);
118 __free_slab(s, page);
120 @@ -1673,7 +1703,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
121 if (!n || !n->nr_partial)
124 - spin_lock(&n->list_lock);
125 + raw_spin_lock(&n->list_lock);
126 list_for_each_entry_safe(page, page2, &n->partial, lru) {
129 @@ -1698,7 +1728,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
133 - spin_unlock(&n->list_lock);
134 + raw_spin_unlock(&n->list_lock);
138 @@ -1944,7 +1974,7 @@ redo:
139 * that acquire_slab() will see a slab page that
142 - spin_lock(&n->list_lock);
143 + raw_spin_lock(&n->list_lock);
147 @@ -1955,7 +1985,7 @@ redo:
148 * slabs from diagnostic functions will not see
151 - spin_lock(&n->list_lock);
152 + raw_spin_lock(&n->list_lock);
156 @@ -1990,7 +2020,7 @@ redo:
160 - spin_unlock(&n->list_lock);
161 + raw_spin_unlock(&n->list_lock);
164 stat(s, DEACTIVATE_EMPTY);
165 @@ -2022,10 +2052,10 @@ static void unfreeze_partials(struct kmem_cache *s,
166 n2 = get_node(s, page_to_nid(page));
169 - spin_unlock(&n->list_lock);
170 + raw_spin_unlock(&n->list_lock);
173 - spin_lock(&n->list_lock);
174 + raw_spin_lock(&n->list_lock);
178 @@ -2054,7 +2084,7 @@ static void unfreeze_partials(struct kmem_cache *s,
182 - spin_unlock(&n->list_lock);
183 + raw_spin_unlock(&n->list_lock);
185 while (discard_page) {
187 @@ -2093,14 +2123,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
188 pobjects = oldpage->pobjects;
189 pages = oldpage->pages;
190 if (drain && pobjects > s->cpu_partial) {
191 + struct slub_free_list *f;
195 * partial array is full. Move the existing
196 * set to the per node partial list.
198 local_irq_save(flags);
199 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
200 + f = this_cpu_ptr(&slub_free_list);
201 + raw_spin_lock(&f->lock);
202 + list_splice_init(&f->list, &tofree);
203 + raw_spin_unlock(&f->lock);
204 local_irq_restore(flags);
205 + free_delayed(&tofree);
209 @@ -2172,7 +2209,22 @@ static bool has_cpu_slab(int cpu, void *info)
211 static void flush_all(struct kmem_cache *s)
216 on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
217 + for_each_online_cpu(cpu) {
218 + struct slub_free_list *f;
220 + if (!has_cpu_slab(cpu, s))
223 + f = &per_cpu(slub_free_list, cpu);
224 + raw_spin_lock_irq(&f->lock);
225 + list_splice_init(&f->list, &tofree);
226 + raw_spin_unlock_irq(&f->lock);
227 + free_delayed(&tofree);
232 @@ -2208,10 +2260,10 @@ static unsigned long count_partial(struct kmem_cache_node *n,
236 - spin_lock_irqsave(&n->list_lock, flags);
237 + raw_spin_lock_irqsave(&n->list_lock, flags);
238 list_for_each_entry(page, &n->partial, lru)
239 x += get_count(page);
240 - spin_unlock_irqrestore(&n->list_lock, flags);
241 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
244 #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */
245 @@ -2349,8 +2401,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
246 * already disabled (which is the case for bulk allocation).
248 static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
249 - unsigned long addr, struct kmem_cache_cpu *c)
250 + unsigned long addr, struct kmem_cache_cpu *c,
251 + struct list_head *to_free)
253 + struct slub_free_list *f;
257 @@ -2410,6 +2464,13 @@ load_freelist:
258 VM_BUG_ON(!c->page->frozen);
259 c->freelist = get_freepointer(s, freelist);
260 c->tid = next_tid(c->tid);
263 + f = this_cpu_ptr(&slub_free_list);
264 + raw_spin_lock(&f->lock);
265 + list_splice_init(&f->list, to_free);
266 + raw_spin_unlock(&f->lock);
271 @@ -2441,7 +2502,7 @@ new_slab:
272 deactivate_slab(s, page, get_freepointer(s, freelist));
280 @@ -2453,6 +2514,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
286 local_irq_save(flags);
287 #ifdef CONFIG_PREEMPT
288 @@ -2464,8 +2526,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
289 c = this_cpu_ptr(s->cpu_slab);
292 - p = ___slab_alloc(s, gfpflags, node, addr, c);
293 + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
294 local_irq_restore(flags);
295 + free_delayed(&tofree);
299 @@ -2652,7 +2715,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
303 - spin_unlock_irqrestore(&n->list_lock, flags);
304 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
307 prior = page->freelist;
308 @@ -2684,7 +2747,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
309 * Otherwise the list_lock will synchronize with
310 * other processors updating the list of slabs.
312 - spin_lock_irqsave(&n->list_lock, flags);
313 + raw_spin_lock_irqsave(&n->list_lock, flags);
317 @@ -2726,7 +2789,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
318 add_partial(n, page, DEACTIVATE_TO_TAIL);
319 stat(s, FREE_ADD_PARTIAL);
321 - spin_unlock_irqrestore(&n->list_lock, flags);
322 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
326 @@ -2741,7 +2804,7 @@ slab_empty:
327 remove_full(s, n, page);
330 - spin_unlock_irqrestore(&n->list_lock, flags);
331 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
333 discard_slab(s, page);
335 @@ -2913,6 +2976,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
338 struct kmem_cache_cpu *c;
339 + LIST_HEAD(to_free);
342 /* memcg and kmem_cache debug support */
343 @@ -2936,7 +3000,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
344 * of re-populating per CPU c->freelist
346 p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
348 + _RET_IP_, c, &to_free);
352 @@ -2948,6 +3012,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
354 c->tid = next_tid(c->tid);
356 + free_delayed(&to_free);
358 /* Clear memory outside IRQ disabled fastpath loop */
359 if (unlikely(flags & __GFP_ZERO)) {
360 @@ -3095,7 +3160,7 @@ static void
361 init_kmem_cache_node(struct kmem_cache_node *n)
364 - spin_lock_init(&n->list_lock);
365 + raw_spin_lock_init(&n->list_lock);
366 INIT_LIST_HEAD(&n->partial);
367 #ifdef CONFIG_SLUB_DEBUG
368 atomic_long_set(&n->nr_slabs, 0);
369 @@ -3677,7 +3742,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
370 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
371 INIT_LIST_HEAD(promote + i);
373 - spin_lock_irqsave(&n->list_lock, flags);
374 + raw_spin_lock_irqsave(&n->list_lock, flags);
377 * Build lists of slabs to discard or promote.
378 @@ -3708,7 +3773,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
379 for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--)
380 list_splice(promote + i, &n->partial);
382 - spin_unlock_irqrestore(&n->list_lock, flags);
383 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
385 /* Release empty slabs */
386 list_for_each_entry_safe(page, t, &discard, lru)
387 @@ -3884,6 +3949,12 @@ void __init kmem_cache_init(void)
389 static __initdata struct kmem_cache boot_kmem_cache,
390 boot_kmem_cache_node;
393 + for_each_possible_cpu(cpu) {
394 + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
395 + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
398 if (debug_guardpage_minorder())
400 @@ -4127,7 +4198,7 @@ static int validate_slab_node(struct kmem_cache *s,
404 - spin_lock_irqsave(&n->list_lock, flags);
405 + raw_spin_lock_irqsave(&n->list_lock, flags);
407 list_for_each_entry(page, &n->partial, lru) {
408 validate_slab_slab(s, page, map);
409 @@ -4149,7 +4220,7 @@ static int validate_slab_node(struct kmem_cache *s,
410 s->name, count, atomic_long_read(&n->nr_slabs));
413 - spin_unlock_irqrestore(&n->list_lock, flags);
414 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
418 @@ -4337,12 +4408,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
419 if (!atomic_long_read(&n->nr_slabs))
422 - spin_lock_irqsave(&n->list_lock, flags);
423 + raw_spin_lock_irqsave(&n->list_lock, flags);
424 list_for_each_entry(page, &n->partial, lru)
425 process_slab(&t, s, page, alloc, map);
426 list_for_each_entry(page, &n->full, lru)
427 process_slab(&t, s, page, alloc, map);
428 - spin_unlock_irqrestore(&n->list_lock, flags);
429 + raw_spin_unlock_irqrestore(&n->list_lock, flags);
432 for (i = 0; i < t.count; i++) {