fs: propagate shrinker::id to list_lru
[linux-block.git] / mm / list_lru.c
CommitLineData
a38e4082
DC
1/*
2 * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3 * Authors: David Chinner and Glauber Costa
4 *
5 * Generic LRU infrastructure
6 */
7#include <linux/kernel.h>
8#include <linux/module.h>
3b1d58a4 9#include <linux/mm.h>
a38e4082 10#include <linux/list_lru.h>
5ca302c8 11#include <linux/slab.h>
c0a5b560 12#include <linux/mutex.h>
60d3fd32 13#include <linux/memcontrol.h>
c0a5b560 14
84c07d11 15#ifdef CONFIG_MEMCG_KMEM
c0a5b560
VD
16static LIST_HEAD(list_lrus);
17static DEFINE_MUTEX(list_lrus_mutex);
18
19static void list_lru_register(struct list_lru *lru)
20{
21 mutex_lock(&list_lrus_mutex);
22 list_add(&lru->list, &list_lrus);
23 mutex_unlock(&list_lrus_mutex);
24}
25
26static void list_lru_unregister(struct list_lru *lru)
27{
28 mutex_lock(&list_lrus_mutex);
29 list_del(&lru->list);
30 mutex_unlock(&list_lrus_mutex);
31}
c0a5b560 32
60d3fd32
VD
33static inline bool list_lru_memcg_aware(struct list_lru *lru)
34{
145949a1
R
35 /*
36 * This needs node 0 to be always present, even
37 * in the systems supporting sparse numa ids.
38 */
60d3fd32
VD
39 return !!lru->node[0].memcg_lrus;
40}
41
42static inline struct list_lru_one *
43list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
44{
0c7c1bed 45 struct list_lru_memcg *memcg_lrus;
60d3fd32 46 /*
0c7c1bed
KT
47 * Either lock or RCU protects the array of per cgroup lists
48 * from relocation (see memcg_update_list_lru_node).
60d3fd32 49 */
0c7c1bed
KT
50 memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
51 lockdep_is_held(&nlru->lock));
52 if (memcg_lrus && idx >= 0)
53 return memcg_lrus->lru[idx];
60d3fd32
VD
54 return &nlru->lru;
55}
56
df406551
VD
57static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
58{
59 struct page *page;
60
61 if (!memcg_kmem_enabled())
62 return NULL;
63 page = virt_to_head_page(ptr);
64 return page->mem_cgroup;
65}
66
60d3fd32
VD
67static inline struct list_lru_one *
68list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
69{
70 struct mem_cgroup *memcg;
71
72 if (!nlru->memcg_lrus)
73 return &nlru->lru;
74
75 memcg = mem_cgroup_from_kmem(ptr);
76 if (!memcg)
77 return &nlru->lru;
78
79 return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
80}
81#else
e0295238
KT
82static void list_lru_register(struct list_lru *lru)
83{
84}
85
86static void list_lru_unregister(struct list_lru *lru)
87{
88}
89
60d3fd32
VD
90static inline bool list_lru_memcg_aware(struct list_lru *lru)
91{
92 return false;
93}
94
95static inline struct list_lru_one *
96list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
97{
98 return &nlru->lru;
99}
100
101static inline struct list_lru_one *
102list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
103{
104 return &nlru->lru;
105}
84c07d11 106#endif /* CONFIG_MEMCG_KMEM */
60d3fd32 107
a38e4082
DC
108bool list_lru_add(struct list_lru *lru, struct list_head *item)
109{
3b1d58a4
DC
110 int nid = page_to_nid(virt_to_page(item));
111 struct list_lru_node *nlru = &lru->node[nid];
60d3fd32 112 struct list_lru_one *l;
3b1d58a4
DC
113
114 spin_lock(&nlru->lock);
a38e4082 115 if (list_empty(item)) {
26f5d760 116 l = list_lru_from_kmem(nlru, item);
60d3fd32
VD
117 list_add_tail(item, &l->list);
118 l->nr_items++;
2c80cd57 119 nlru->nr_items++;
3b1d58a4 120 spin_unlock(&nlru->lock);
a38e4082
DC
121 return true;
122 }
3b1d58a4 123 spin_unlock(&nlru->lock);
a38e4082
DC
124 return false;
125}
126EXPORT_SYMBOL_GPL(list_lru_add);
127
128bool list_lru_del(struct list_lru *lru, struct list_head *item)
129{
3b1d58a4
DC
130 int nid = page_to_nid(virt_to_page(item));
131 struct list_lru_node *nlru = &lru->node[nid];
60d3fd32 132 struct list_lru_one *l;
3b1d58a4
DC
133
134 spin_lock(&nlru->lock);
a38e4082 135 if (!list_empty(item)) {
26f5d760 136 l = list_lru_from_kmem(nlru, item);
a38e4082 137 list_del_init(item);
60d3fd32 138 l->nr_items--;
2c80cd57 139 nlru->nr_items--;
3b1d58a4 140 spin_unlock(&nlru->lock);
a38e4082
DC
141 return true;
142 }
3b1d58a4 143 spin_unlock(&nlru->lock);
a38e4082
DC
144 return false;
145}
146EXPORT_SYMBOL_GPL(list_lru_del);
147
3f97b163
VD
148void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
149{
150 list_del_init(item);
151 list->nr_items--;
152}
153EXPORT_SYMBOL_GPL(list_lru_isolate);
154
155void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
156 struct list_head *head)
157{
158 list_move(item, head);
159 list->nr_items--;
160}
161EXPORT_SYMBOL_GPL(list_lru_isolate_move);
162
930eaac5
AM
163unsigned long list_lru_count_one(struct list_lru *lru,
164 int nid, struct mem_cgroup *memcg)
a38e4082 165{
6a4f496f 166 struct list_lru_node *nlru = &lru->node[nid];
60d3fd32
VD
167 struct list_lru_one *l;
168 unsigned long count;
3b1d58a4 169
0c7c1bed 170 rcu_read_lock();
930eaac5 171 l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
60d3fd32 172 count = l->nr_items;
0c7c1bed 173 rcu_read_unlock();
3b1d58a4
DC
174
175 return count;
176}
60d3fd32
VD
177EXPORT_SYMBOL_GPL(list_lru_count_one);
178
179unsigned long list_lru_count_node(struct list_lru *lru, int nid)
180{
2c80cd57 181 struct list_lru_node *nlru;
60d3fd32 182
2c80cd57
ST
183 nlru = &lru->node[nid];
184 return nlru->nr_items;
60d3fd32 185}
6a4f496f 186EXPORT_SYMBOL_GPL(list_lru_count_node);
3b1d58a4 187
60d3fd32
VD
188static unsigned long
189__list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
190 list_lru_walk_cb isolate, void *cb_arg,
191 unsigned long *nr_to_walk)
3b1d58a4
DC
192{
193
60d3fd32
VD
194 struct list_lru_node *nlru = &lru->node[nid];
195 struct list_lru_one *l;
a38e4082 196 struct list_head *item, *n;
3b1d58a4 197 unsigned long isolated = 0;
a38e4082 198
3b1d58a4 199 spin_lock(&nlru->lock);
60d3fd32 200 l = list_lru_from_memcg_idx(nlru, memcg_idx);
a38e4082 201restart:
60d3fd32 202 list_for_each_safe(item, n, &l->list) {
a38e4082 203 enum lru_status ret;
5cedf721
DC
204
205 /*
206 * decrement nr_to_walk first so that we don't livelock if we
207 * get stuck on large numbesr of LRU_RETRY items
208 */
c56b097a 209 if (!*nr_to_walk)
5cedf721 210 break;
c56b097a 211 --*nr_to_walk;
5cedf721 212
3f97b163 213 ret = isolate(item, l, &nlru->lock, cb_arg);
a38e4082 214 switch (ret) {
449dd698
JW
215 case LRU_REMOVED_RETRY:
216 assert_spin_locked(&nlru->lock);
5b568acc 217 /* fall through */
a38e4082 218 case LRU_REMOVED:
3b1d58a4 219 isolated++;
2c80cd57 220 nlru->nr_items--;
449dd698
JW
221 /*
222 * If the lru lock has been dropped, our list
223 * traversal is now invalid and so we have to
224 * restart from scratch.
225 */
226 if (ret == LRU_REMOVED_RETRY)
227 goto restart;
a38e4082
DC
228 break;
229 case LRU_ROTATE:
60d3fd32 230 list_move_tail(item, &l->list);
a38e4082
DC
231 break;
232 case LRU_SKIP:
233 break;
234 case LRU_RETRY:
5cedf721
DC
235 /*
236 * The lru lock has been dropped, our list traversal is
237 * now invalid and so we have to restart from scratch.
238 */
449dd698 239 assert_spin_locked(&nlru->lock);
a38e4082
DC
240 goto restart;
241 default:
242 BUG();
243 }
a38e4082 244 }
3b1d58a4
DC
245
246 spin_unlock(&nlru->lock);
247 return isolated;
248}
60d3fd32
VD
249
250unsigned long
251list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
252 list_lru_walk_cb isolate, void *cb_arg,
253 unsigned long *nr_to_walk)
254{
255 return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
256 isolate, cb_arg, nr_to_walk);
257}
258EXPORT_SYMBOL_GPL(list_lru_walk_one);
259
260unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
261 list_lru_walk_cb isolate, void *cb_arg,
262 unsigned long *nr_to_walk)
263{
264 long isolated = 0;
265 int memcg_idx;
266
267 isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg,
268 nr_to_walk);
269 if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
270 for_each_memcg_cache_index(memcg_idx) {
271 isolated += __list_lru_walk_one(lru, nid, memcg_idx,
272 isolate, cb_arg, nr_to_walk);
273 if (*nr_to_walk <= 0)
274 break;
275 }
276 }
277 return isolated;
278}
3b1d58a4
DC
279EXPORT_SYMBOL_GPL(list_lru_walk_node);
280
60d3fd32
VD
281static void init_one_lru(struct list_lru_one *l)
282{
283 INIT_LIST_HEAD(&l->list);
284 l->nr_items = 0;
285}
286
84c07d11 287#ifdef CONFIG_MEMCG_KMEM
60d3fd32
VD
288static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
289 int begin, int end)
290{
291 int i;
292
293 for (i = begin; i < end; i++)
294 kfree(memcg_lrus->lru[i]);
295}
296
297static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
298 int begin, int end)
299{
300 int i;
301
302 for (i = begin; i < end; i++) {
303 struct list_lru_one *l;
304
305 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
306 if (!l)
307 goto fail;
308
309 init_one_lru(l);
310 memcg_lrus->lru[i] = l;
311 }
312 return 0;
313fail:
314 __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
315 return -ENOMEM;
316}
317
318static int memcg_init_list_lru_node(struct list_lru_node *nlru)
319{
0c7c1bed 320 struct list_lru_memcg *memcg_lrus;
60d3fd32
VD
321 int size = memcg_nr_cache_ids;
322
0c7c1bed
KT
323 memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
324 size * sizeof(void *), GFP_KERNEL);
325 if (!memcg_lrus)
60d3fd32
VD
326 return -ENOMEM;
327
0c7c1bed
KT
328 if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
329 kvfree(memcg_lrus);
60d3fd32
VD
330 return -ENOMEM;
331 }
0c7c1bed 332 RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
60d3fd32
VD
333
334 return 0;
335}
336
337static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
338{
0c7c1bed
KT
339 struct list_lru_memcg *memcg_lrus;
340 /*
341 * This is called when shrinker has already been unregistered,
342 * and nobody can use it. So, there is no need to use kvfree_rcu().
343 */
344 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
345 __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
346 kvfree(memcg_lrus);
347}
348
349static void kvfree_rcu(struct rcu_head *head)
350{
351 struct list_lru_memcg *mlru;
352
353 mlru = container_of(head, struct list_lru_memcg, rcu);
354 kvfree(mlru);
60d3fd32
VD
355}
356
357static int memcg_update_list_lru_node(struct list_lru_node *nlru,
358 int old_size, int new_size)
359{
360 struct list_lru_memcg *old, *new;
361
362 BUG_ON(old_size > new_size);
363
0c7c1bed
KT
364 old = rcu_dereference_protected(nlru->memcg_lrus,
365 lockdep_is_held(&list_lrus_mutex));
366 new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
60d3fd32
VD
367 if (!new)
368 return -ENOMEM;
369
370 if (__memcg_init_list_lru_node(new, old_size, new_size)) {
f80c7dab 371 kvfree(new);
60d3fd32
VD
372 return -ENOMEM;
373 }
374
0c7c1bed 375 memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
60d3fd32
VD
376
377 /*
0c7c1bed
KT
378 * The locking below allows readers that hold nlru->lock avoid taking
379 * rcu_read_lock (see list_lru_from_memcg_idx).
60d3fd32
VD
380 *
381 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
382 * we have to use IRQ-safe primitives here to avoid deadlock.
383 */
384 spin_lock_irq(&nlru->lock);
0c7c1bed 385 rcu_assign_pointer(nlru->memcg_lrus, new);
60d3fd32
VD
386 spin_unlock_irq(&nlru->lock);
387
0c7c1bed 388 call_rcu(&old->rcu, kvfree_rcu);
60d3fd32
VD
389 return 0;
390}
391
392static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
393 int old_size, int new_size)
394{
0c7c1bed
KT
395 struct list_lru_memcg *memcg_lrus;
396
397 memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
398 lockdep_is_held(&list_lrus_mutex));
60d3fd32
VD
399 /* do not bother shrinking the array back to the old size, because we
400 * cannot handle allocation failures here */
0c7c1bed 401 __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
60d3fd32
VD
402}
403
404static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
405{
406 int i;
407
145949a1
R
408 if (!memcg_aware)
409 return 0;
410
411 for_each_node(i) {
412 if (memcg_init_list_lru_node(&lru->node[i]))
60d3fd32
VD
413 goto fail;
414 }
415 return 0;
416fail:
145949a1
R
417 for (i = i - 1; i >= 0; i--) {
418 if (!lru->node[i].memcg_lrus)
419 continue;
60d3fd32 420 memcg_destroy_list_lru_node(&lru->node[i]);
145949a1 421 }
60d3fd32
VD
422 return -ENOMEM;
423}
424
425static void memcg_destroy_list_lru(struct list_lru *lru)
426{
427 int i;
428
429 if (!list_lru_memcg_aware(lru))
430 return;
431
145949a1 432 for_each_node(i)
60d3fd32
VD
433 memcg_destroy_list_lru_node(&lru->node[i]);
434}
435
436static int memcg_update_list_lru(struct list_lru *lru,
437 int old_size, int new_size)
438{
439 int i;
440
441 if (!list_lru_memcg_aware(lru))
442 return 0;
443
145949a1 444 for_each_node(i) {
60d3fd32
VD
445 if (memcg_update_list_lru_node(&lru->node[i],
446 old_size, new_size))
447 goto fail;
448 }
449 return 0;
450fail:
145949a1
R
451 for (i = i - 1; i >= 0; i--) {
452 if (!lru->node[i].memcg_lrus)
453 continue;
454
60d3fd32
VD
455 memcg_cancel_update_list_lru_node(&lru->node[i],
456 old_size, new_size);
145949a1 457 }
60d3fd32
VD
458 return -ENOMEM;
459}
460
461static void memcg_cancel_update_list_lru(struct list_lru *lru,
462 int old_size, int new_size)
463{
464 int i;
465
466 if (!list_lru_memcg_aware(lru))
467 return;
468
145949a1 469 for_each_node(i)
60d3fd32
VD
470 memcg_cancel_update_list_lru_node(&lru->node[i],
471 old_size, new_size);
472}
473
474int memcg_update_all_list_lrus(int new_size)
475{
476 int ret = 0;
477 struct list_lru *lru;
478 int old_size = memcg_nr_cache_ids;
479
480 mutex_lock(&list_lrus_mutex);
481 list_for_each_entry(lru, &list_lrus, list) {
482 ret = memcg_update_list_lru(lru, old_size, new_size);
483 if (ret)
484 goto fail;
485 }
486out:
487 mutex_unlock(&list_lrus_mutex);
488 return ret;
489fail:
490 list_for_each_entry_continue_reverse(lru, &list_lrus, list)
491 memcg_cancel_update_list_lru(lru, old_size, new_size);
492 goto out;
493}
2788cf0c
VD
494
495static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
496 int src_idx, int dst_idx)
497{
498 struct list_lru_one *src, *dst;
499
500 /*
501 * Since list_lru_{add,del} may be called under an IRQ-safe lock,
502 * we have to use IRQ-safe primitives here to avoid deadlock.
503 */
504 spin_lock_irq(&nlru->lock);
505
506 src = list_lru_from_memcg_idx(nlru, src_idx);
507 dst = list_lru_from_memcg_idx(nlru, dst_idx);
508
509 list_splice_init(&src->list, &dst->list);
510 dst->nr_items += src->nr_items;
511 src->nr_items = 0;
512
513 spin_unlock_irq(&nlru->lock);
514}
515
516static void memcg_drain_list_lru(struct list_lru *lru,
517 int src_idx, int dst_idx)
518{
519 int i;
520
521 if (!list_lru_memcg_aware(lru))
522 return;
523
145949a1 524 for_each_node(i)
2788cf0c
VD
525 memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx);
526}
527
528void memcg_drain_all_list_lrus(int src_idx, int dst_idx)
529{
530 struct list_lru *lru;
531
532 mutex_lock(&list_lrus_mutex);
533 list_for_each_entry(lru, &list_lrus, list)
534 memcg_drain_list_lru(lru, src_idx, dst_idx);
535 mutex_unlock(&list_lrus_mutex);
536}
60d3fd32
VD
537#else
538static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
539{
540 return 0;
541}
542
543static void memcg_destroy_list_lru(struct list_lru *lru)
544{
545}
84c07d11 546#endif /* CONFIG_MEMCG_KMEM */
60d3fd32
VD
547
548int __list_lru_init(struct list_lru *lru, bool memcg_aware,
c92e8e10 549 struct lock_class_key *key, struct shrinker *shrinker)
a38e4082 550{
3b1d58a4 551 int i;
5ca302c8 552 size_t size = sizeof(*lru->node) * nr_node_ids;
60d3fd32
VD
553 int err = -ENOMEM;
554
c92e8e10
KT
555#ifdef CONFIG_MEMCG_KMEM
556 if (shrinker)
557 lru->shrinker_id = shrinker->id;
558 else
559 lru->shrinker_id = -1;
560#endif
60d3fd32 561 memcg_get_cache_ids();
5ca302c8
GC
562
563 lru->node = kzalloc(size, GFP_KERNEL);
564 if (!lru->node)
60d3fd32 565 goto out;
a38e4082 566
145949a1 567 for_each_node(i) {
3b1d58a4 568 spin_lock_init(&lru->node[i].lock);
449dd698
JW
569 if (key)
570 lockdep_set_class(&lru->node[i].lock, key);
60d3fd32
VD
571 init_one_lru(&lru->node[i].lru);
572 }
573
574 err = memcg_init_list_lru(lru, memcg_aware);
575 if (err) {
576 kfree(lru->node);
1bc11d70
AP
577 /* Do this so a list_lru_destroy() doesn't crash: */
578 lru->node = NULL;
60d3fd32 579 goto out;
3b1d58a4 580 }
60d3fd32 581
c0a5b560 582 list_lru_register(lru);
60d3fd32
VD
583out:
584 memcg_put_cache_ids();
585 return err;
a38e4082 586}
60d3fd32 587EXPORT_SYMBOL_GPL(__list_lru_init);
5ca302c8
GC
588
589void list_lru_destroy(struct list_lru *lru)
590{
c0a5b560
VD
591 /* Already destroyed or not yet initialized? */
592 if (!lru->node)
593 return;
60d3fd32
VD
594
595 memcg_get_cache_ids();
596
c0a5b560 597 list_lru_unregister(lru);
60d3fd32
VD
598
599 memcg_destroy_list_lru(lru);
5ca302c8 600 kfree(lru->node);
c0a5b560 601 lru->node = NULL;
60d3fd32 602
c92e8e10
KT
603#ifdef CONFIG_MEMCG_KMEM
604 lru->shrinker_id = -1;
605#endif
60d3fd32 606 memcg_put_cache_ids();
5ca302c8
GC
607}
608EXPORT_SYMBOL_GPL(list_lru_destroy);