Merge tag 'stream_open-5.3' of https://lab.nexedi.com/kirr/linux
[linux-2.6-block.git] / mm / list_lru.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4  * Authors: David Chinner and Glauber Costa
5  *
6  * Generic LRU infrastructure
7  */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/mm.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
15 #include "slab.h"
16
17 #ifdef CONFIG_MEMCG_KMEM
18 static LIST_HEAD(list_lrus);
19 static DEFINE_MUTEX(list_lrus_mutex);
20
21 static void list_lru_register(struct list_lru *lru)
22 {
23         mutex_lock(&list_lrus_mutex);
24         list_add(&lru->list, &list_lrus);
25         mutex_unlock(&list_lrus_mutex);
26 }
27
28 static void list_lru_unregister(struct list_lru *lru)
29 {
30         mutex_lock(&list_lrus_mutex);
31         list_del(&lru->list);
32         mutex_unlock(&list_lrus_mutex);
33 }
34
35 static int lru_shrinker_id(struct list_lru *lru)
36 {
37         return lru->shrinker_id;
38 }
39
40 static inline bool list_lru_memcg_aware(struct list_lru *lru)
41 {
42         return lru->memcg_aware;
43 }
44
45 static inline struct list_lru_one *
46 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
47 {
48         struct list_lru_memcg *memcg_lrus;
49         /*
50          * Either lock or RCU protects the array of per cgroup lists
51          * from relocation (see memcg_update_list_lru_node).
52          */
53         memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
54                                            lockdep_is_held(&nlru->lock));
55         if (memcg_lrus && idx >= 0)
56                 return memcg_lrus->lru[idx];
57         return &nlru->lru;
58 }
59
60 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
61 {
62         struct page *page;
63
64         if (!memcg_kmem_enabled())
65                 return NULL;
66         page = virt_to_head_page(ptr);
67         return memcg_from_slab_page(page);
68 }
69
70 static inline struct list_lru_one *
71 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
72                    struct mem_cgroup **memcg_ptr)
73 {
74         struct list_lru_one *l = &nlru->lru;
75         struct mem_cgroup *memcg = NULL;
76
77         if (!nlru->memcg_lrus)
78                 goto out;
79
80         memcg = mem_cgroup_from_kmem(ptr);
81         if (!memcg)
82                 goto out;
83
84         l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
85 out:
86         if (memcg_ptr)
87                 *memcg_ptr = memcg;
88         return l;
89 }
90 #else
91 static void list_lru_register(struct list_lru *lru)
92 {
93 }
94
95 static void list_lru_unregister(struct list_lru *lru)
96 {
97 }
98
99 static int lru_shrinker_id(struct list_lru *lru)
100 {
101         return -1;
102 }
103
104 static inline bool list_lru_memcg_aware(struct list_lru *lru)
105 {
106         return false;
107 }
108
109 static inline struct list_lru_one *
110 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
111 {
112         return &nlru->lru;
113 }
114
115 static inline struct list_lru_one *
116 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
117                    struct mem_cgroup **memcg_ptr)
118 {
119         if (memcg_ptr)
120                 *memcg_ptr = NULL;
121         return &nlru->lru;
122 }
123 #endif /* CONFIG_MEMCG_KMEM */
124
125 bool list_lru_add(struct list_lru *lru, struct list_head *item)
126 {
127         int nid = page_to_nid(virt_to_page(item));
128         struct list_lru_node *nlru = &lru->node[nid];
129         struct mem_cgroup *memcg;
130         struct list_lru_one *l;
131
132         spin_lock(&nlru->lock);
133         if (list_empty(item)) {
134                 l = list_lru_from_kmem(nlru, item, &memcg);
135                 list_add_tail(item, &l->list);
136                 /* Set shrinker bit if the first element was added */
137                 if (!l->nr_items++)
138                         memcg_set_shrinker_bit(memcg, nid,
139                                                lru_shrinker_id(lru));
140                 nlru->nr_items++;
141                 spin_unlock(&nlru->lock);
142                 return true;
143         }
144         spin_unlock(&nlru->lock);
145         return false;
146 }
147 EXPORT_SYMBOL_GPL(list_lru_add);
148
149 bool list_lru_del(struct list_lru *lru, struct list_head *item)
150 {
151         int nid = page_to_nid(virt_to_page(item));
152         struct list_lru_node *nlru = &lru->node[nid];
153         struct list_lru_one *l;
154
155         spin_lock(&nlru->lock);
156         if (!list_empty(item)) {
157                 l = list_lru_from_kmem(nlru, item, NULL);
158                 list_del_init(item);
159                 l->nr_items--;
160                 nlru->nr_items--;
161                 spin_unlock(&nlru->lock);
162                 return true;
163         }
164         spin_unlock(&nlru->lock);
165         return false;
166 }
167 EXPORT_SYMBOL_GPL(list_lru_del);
168
169 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
170 {
171         list_del_init(item);
172         list->nr_items--;
173 }
174 EXPORT_SYMBOL_GPL(list_lru_isolate);
175
176 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
177                            struct list_head *head)
178 {
179         list_move(item, head);
180         list->nr_items--;
181 }
182 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
183
184 unsigned long list_lru_count_one(struct list_lru *lru,
185                                  int nid, struct mem_cgroup *memcg)
186 {
187         struct list_lru_node *nlru = &lru->node[nid];
188         struct list_lru_one *l;
189         unsigned long count;
190
191         rcu_read_lock();
192         l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
193         count = l->nr_items;
194         rcu_read_unlock();
195
196         return count;
197 }
198 EXPORT_SYMBOL_GPL(list_lru_count_one);
199
200 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
201 {
202         struct list_lru_node *nlru;
203
204         nlru = &lru->node[nid];
205         return nlru->nr_items;
206 }
207 EXPORT_SYMBOL_GPL(list_lru_count_node);
208
209 static unsigned long
210 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
211                     list_lru_walk_cb isolate, void *cb_arg,
212                     unsigned long *nr_to_walk)
213 {
214
215         struct list_lru_one *l;
216         struct list_head *item, *n;
217         unsigned long isolated = 0;
218
219         l = list_lru_from_memcg_idx(nlru, memcg_idx);
220 restart:
221         list_for_each_safe(item, n, &l->list) {
222                 enum lru_status ret;
223
224                 /*
225                  * decrement nr_to_walk first so that we don't livelock if we
226                  * get stuck on large numbesr of LRU_RETRY items
227                  */
228                 if (!*nr_to_walk)
229                         break;
230                 --*nr_to_walk;
231
232                 ret = isolate(item, l, &nlru->lock, cb_arg);
233                 switch (ret) {
234                 case LRU_REMOVED_RETRY:
235                         assert_spin_locked(&nlru->lock);
236                         /* fall through */
237                 case LRU_REMOVED:
238                         isolated++;
239                         nlru->nr_items--;
240                         /*
241                          * If the lru lock has been dropped, our list
242                          * traversal is now invalid and so we have to
243                          * restart from scratch.
244                          */
245                         if (ret == LRU_REMOVED_RETRY)
246                                 goto restart;
247                         break;
248                 case LRU_ROTATE:
249                         list_move_tail(item, &l->list);
250                         break;
251                 case LRU_SKIP:
252                         break;
253                 case LRU_RETRY:
254                         /*
255                          * The lru lock has been dropped, our list traversal is
256                          * now invalid and so we have to restart from scratch.
257                          */
258                         assert_spin_locked(&nlru->lock);
259                         goto restart;
260                 default:
261                         BUG();
262                 }
263         }
264         return isolated;
265 }
266
267 unsigned long
268 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
269                   list_lru_walk_cb isolate, void *cb_arg,
270                   unsigned long *nr_to_walk)
271 {
272         struct list_lru_node *nlru = &lru->node[nid];
273         unsigned long ret;
274
275         spin_lock(&nlru->lock);
276         ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
277                                   nr_to_walk);
278         spin_unlock(&nlru->lock);
279         return ret;
280 }
281 EXPORT_SYMBOL_GPL(list_lru_walk_one);
282
283 unsigned long
284 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
285                       list_lru_walk_cb isolate, void *cb_arg,
286                       unsigned long *nr_to_walk)
287 {
288         struct list_lru_node *nlru = &lru->node[nid];
289         unsigned long ret;
290
291         spin_lock_irq(&nlru->lock);
292         ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
293                                   nr_to_walk);
294         spin_unlock_irq(&nlru->lock);
295         return ret;
296 }
297
298 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
299                                  list_lru_walk_cb isolate, void *cb_arg,
300                                  unsigned long *nr_to_walk)
301 {
302         long isolated = 0;
303         int memcg_idx;
304
305         isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
306                                       nr_to_walk);
307         if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
308                 for_each_memcg_cache_index(memcg_idx) {
309                         struct list_lru_node *nlru = &lru->node[nid];
310
311                         spin_lock(&nlru->lock);
312                         isolated += __list_lru_walk_one(nlru, memcg_idx,
313                                                         isolate, cb_arg,
314                                                         nr_to_walk);
315                         spin_unlock(&nlru->lock);
316
317                         if (*nr_to_walk <= 0)
318                                 break;
319                 }
320         }
321         return isolated;
322 }
323 EXPORT_SYMBOL_GPL(list_lru_walk_node);
324
325 static void init_one_lru(struct list_lru_one *l)
326 {
327         INIT_LIST_HEAD(&l->list);
328         l->nr_items = 0;
329 }
330
331 #ifdef CONFIG_MEMCG_KMEM
332 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
333                                           int begin, int end)
334 {
335         int i;
336
337         for (i = begin; i < end; i++)
338                 kfree(memcg_lrus->lru[i]);
339 }
340
341 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
342                                       int begin, int end)
343 {
344         int i;
345
346         for (i = begin; i < end; i++) {
347                 struct list_lru_one *l;
348
349                 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
350                 if (!l)
351                         goto fail;
352
353                 init_one_lru(l);
354                 memcg_lrus->lru[i] = l;
355         }
356         return 0;
357 fail:
358         __memcg_destroy_list_lru_node(memcg_lrus, begin, i);
359         return -ENOMEM;
360 }
361
362 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
363 {
364         struct list_lru_memcg *memcg_lrus;
365         int size = memcg_nr_cache_ids;
366
367         memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
368                               size * sizeof(void *), GFP_KERNEL);
369         if (!memcg_lrus)
370                 return -ENOMEM;
371
372         if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
373                 kvfree(memcg_lrus);
374                 return -ENOMEM;
375         }
376         RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
377
378         return 0;
379 }
380
381 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
382 {
383         struct list_lru_memcg *memcg_lrus;
384         /*
385          * This is called when shrinker has already been unregistered,
386          * and nobody can use it. So, there is no need to use kvfree_rcu().
387          */
388         memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
389         __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
390         kvfree(memcg_lrus);
391 }
392
393 static void kvfree_rcu(struct rcu_head *head)
394 {
395         struct list_lru_memcg *mlru;
396
397         mlru = container_of(head, struct list_lru_memcg, rcu);
398         kvfree(mlru);
399 }
400
401 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
402                                       int old_size, int new_size)
403 {
404         struct list_lru_memcg *old, *new;
405
406         BUG_ON(old_size > new_size);
407
408         old = rcu_dereference_protected(nlru->memcg_lrus,
409                                         lockdep_is_held(&list_lrus_mutex));
410         new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
411         if (!new)
412                 return -ENOMEM;
413
414         if (__memcg_init_list_lru_node(new, old_size, new_size)) {
415                 kvfree(new);
416                 return -ENOMEM;
417         }
418
419         memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
420
421         /*
422          * The locking below allows readers that hold nlru->lock avoid taking
423          * rcu_read_lock (see list_lru_from_memcg_idx).
424          *
425          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
426          * we have to use IRQ-safe primitives here to avoid deadlock.
427          */
428         spin_lock_irq(&nlru->lock);
429         rcu_assign_pointer(nlru->memcg_lrus, new);
430         spin_unlock_irq(&nlru->lock);
431
432         call_rcu(&old->rcu, kvfree_rcu);
433         return 0;
434 }
435
436 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
437                                               int old_size, int new_size)
438 {
439         struct list_lru_memcg *memcg_lrus;
440
441         memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
442                                                lockdep_is_held(&list_lrus_mutex));
443         /* do not bother shrinking the array back to the old size, because we
444          * cannot handle allocation failures here */
445         __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
446 }
447
448 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
449 {
450         int i;
451
452         lru->memcg_aware = memcg_aware;
453
454         if (!memcg_aware)
455                 return 0;
456
457         for_each_node(i) {
458                 if (memcg_init_list_lru_node(&lru->node[i]))
459                         goto fail;
460         }
461         return 0;
462 fail:
463         for (i = i - 1; i >= 0; i--) {
464                 if (!lru->node[i].memcg_lrus)
465                         continue;
466                 memcg_destroy_list_lru_node(&lru->node[i]);
467         }
468         return -ENOMEM;
469 }
470
471 static void memcg_destroy_list_lru(struct list_lru *lru)
472 {
473         int i;
474
475         if (!list_lru_memcg_aware(lru))
476                 return;
477
478         for_each_node(i)
479                 memcg_destroy_list_lru_node(&lru->node[i]);
480 }
481
482 static int memcg_update_list_lru(struct list_lru *lru,
483                                  int old_size, int new_size)
484 {
485         int i;
486
487         if (!list_lru_memcg_aware(lru))
488                 return 0;
489
490         for_each_node(i) {
491                 if (memcg_update_list_lru_node(&lru->node[i],
492                                                old_size, new_size))
493                         goto fail;
494         }
495         return 0;
496 fail:
497         for (i = i - 1; i >= 0; i--) {
498                 if (!lru->node[i].memcg_lrus)
499                         continue;
500
501                 memcg_cancel_update_list_lru_node(&lru->node[i],
502                                                   old_size, new_size);
503         }
504         return -ENOMEM;
505 }
506
507 static void memcg_cancel_update_list_lru(struct list_lru *lru,
508                                          int old_size, int new_size)
509 {
510         int i;
511
512         if (!list_lru_memcg_aware(lru))
513                 return;
514
515         for_each_node(i)
516                 memcg_cancel_update_list_lru_node(&lru->node[i],
517                                                   old_size, new_size);
518 }
519
520 int memcg_update_all_list_lrus(int new_size)
521 {
522         int ret = 0;
523         struct list_lru *lru;
524         int old_size = memcg_nr_cache_ids;
525
526         mutex_lock(&list_lrus_mutex);
527         list_for_each_entry(lru, &list_lrus, list) {
528                 ret = memcg_update_list_lru(lru, old_size, new_size);
529                 if (ret)
530                         goto fail;
531         }
532 out:
533         mutex_unlock(&list_lrus_mutex);
534         return ret;
535 fail:
536         list_for_each_entry_continue_reverse(lru, &list_lrus, list)
537                 memcg_cancel_update_list_lru(lru, old_size, new_size);
538         goto out;
539 }
540
541 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
542                                       int src_idx, struct mem_cgroup *dst_memcg)
543 {
544         struct list_lru_node *nlru = &lru->node[nid];
545         int dst_idx = dst_memcg->kmemcg_id;
546         struct list_lru_one *src, *dst;
547         bool set;
548
549         /*
550          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
551          * we have to use IRQ-safe primitives here to avoid deadlock.
552          */
553         spin_lock_irq(&nlru->lock);
554
555         src = list_lru_from_memcg_idx(nlru, src_idx);
556         dst = list_lru_from_memcg_idx(nlru, dst_idx);
557
558         list_splice_init(&src->list, &dst->list);
559         set = (!dst->nr_items && src->nr_items);
560         dst->nr_items += src->nr_items;
561         if (set)
562                 memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
563         src->nr_items = 0;
564
565         spin_unlock_irq(&nlru->lock);
566 }
567
568 static void memcg_drain_list_lru(struct list_lru *lru,
569                                  int src_idx, struct mem_cgroup *dst_memcg)
570 {
571         int i;
572
573         if (!list_lru_memcg_aware(lru))
574                 return;
575
576         for_each_node(i)
577                 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
578 }
579
580 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
581 {
582         struct list_lru *lru;
583
584         mutex_lock(&list_lrus_mutex);
585         list_for_each_entry(lru, &list_lrus, list)
586                 memcg_drain_list_lru(lru, src_idx, dst_memcg);
587         mutex_unlock(&list_lrus_mutex);
588 }
589 #else
590 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
591 {
592         return 0;
593 }
594
595 static void memcg_destroy_list_lru(struct list_lru *lru)
596 {
597 }
598 #endif /* CONFIG_MEMCG_KMEM */
599
600 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
601                     struct lock_class_key *key, struct shrinker *shrinker)
602 {
603         int i;
604         int err = -ENOMEM;
605
606 #ifdef CONFIG_MEMCG_KMEM
607         if (shrinker)
608                 lru->shrinker_id = shrinker->id;
609         else
610                 lru->shrinker_id = -1;
611 #endif
612         memcg_get_cache_ids();
613
614         lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
615         if (!lru->node)
616                 goto out;
617
618         for_each_node(i) {
619                 spin_lock_init(&lru->node[i].lock);
620                 if (key)
621                         lockdep_set_class(&lru->node[i].lock, key);
622                 init_one_lru(&lru->node[i].lru);
623         }
624
625         err = memcg_init_list_lru(lru, memcg_aware);
626         if (err) {
627                 kfree(lru->node);
628                 /* Do this so a list_lru_destroy() doesn't crash: */
629                 lru->node = NULL;
630                 goto out;
631         }
632
633         list_lru_register(lru);
634 out:
635         memcg_put_cache_ids();
636         return err;
637 }
638 EXPORT_SYMBOL_GPL(__list_lru_init);
639
640 void list_lru_destroy(struct list_lru *lru)
641 {
642         /* Already destroyed or not yet initialized? */
643         if (!lru->node)
644                 return;
645
646         memcg_get_cache_ids();
647
648         list_lru_unregister(lru);
649
650         memcg_destroy_list_lru(lru);
651         kfree(lru->node);
652         lru->node = NULL;
653
654 #ifdef CONFIG_MEMCG_KMEM
655         lru->shrinker_id = -1;
656 #endif
657         memcg_put_cache_ids();
658 }
659 EXPORT_SYMBOL_GPL(list_lru_destroy);