ec48b5dadf519a5296ac14cda035c067f9e448f8
[linux-block.git] / mm / list_lru.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
4  * Authors: David Chinner and Glauber Costa
5  *
6  * Generic LRU infrastructure
7  */
8 #include <linux/kernel.h>
9 #include <linux/module.h>
10 #include <linux/mm.h>
11 #include <linux/list_lru.h>
12 #include <linux/slab.h>
13 #include <linux/mutex.h>
14 #include <linux/memcontrol.h>
15 #include "slab.h"
16 #include "internal.h"
17
18 #ifdef CONFIG_MEMCG
19 static LIST_HEAD(memcg_list_lrus);
20 static DEFINE_MUTEX(list_lrus_mutex);
21
22 static inline bool list_lru_memcg_aware(struct list_lru *lru)
23 {
24         return lru->memcg_aware;
25 }
26
27 static void list_lru_register(struct list_lru *lru)
28 {
29         if (!list_lru_memcg_aware(lru))
30                 return;
31
32         mutex_lock(&list_lrus_mutex);
33         list_add(&lru->list, &memcg_list_lrus);
34         mutex_unlock(&list_lrus_mutex);
35 }
36
37 static void list_lru_unregister(struct list_lru *lru)
38 {
39         if (!list_lru_memcg_aware(lru))
40                 return;
41
42         mutex_lock(&list_lrus_mutex);
43         list_del(&lru->list);
44         mutex_unlock(&list_lrus_mutex);
45 }
46
47 static int lru_shrinker_id(struct list_lru *lru)
48 {
49         return lru->shrinker_id;
50 }
51
52 static inline struct list_lru_one *
53 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
54 {
55         if (list_lru_memcg_aware(lru) && idx >= 0) {
56                 struct list_lru_memcg *mlru = xa_load(&lru->xa, idx);
57
58                 return mlru ? &mlru->node[nid] : NULL;
59         }
60         return &lru->node[nid].lru;
61 }
62
63 static inline bool lock_list_lru(struct list_lru_one *l, bool irq)
64 {
65         if (irq)
66                 spin_lock_irq(&l->lock);
67         else
68                 spin_lock(&l->lock);
69         if (unlikely(READ_ONCE(l->nr_items) == LONG_MIN)) {
70                 if (irq)
71                         spin_unlock_irq(&l->lock);
72                 else
73                         spin_unlock(&l->lock);
74                 return false;
75         }
76         return true;
77 }
78
79 static inline struct list_lru_one *
80 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
81                        bool irq, bool skip_empty)
82 {
83         struct list_lru_one *l;
84
85         rcu_read_lock();
86 again:
87         l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
88         if (likely(l) && lock_list_lru(l, irq)) {
89                 rcu_read_unlock();
90                 return l;
91         }
92         /*
93          * Caller may simply bail out if raced with reparenting or
94          * may iterate through the list_lru and expect empty slots.
95          */
96         if (skip_empty) {
97                 rcu_read_unlock();
98                 return NULL;
99         }
100         VM_WARN_ON(!css_is_dying(&memcg->css));
101         memcg = parent_mem_cgroup(memcg);
102         goto again;
103 }
104
105 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
106 {
107         if (irq_off)
108                 spin_unlock_irq(&l->lock);
109         else
110                 spin_unlock(&l->lock);
111 }
112 #else
113 static void list_lru_register(struct list_lru *lru)
114 {
115 }
116
117 static void list_lru_unregister(struct list_lru *lru)
118 {
119 }
120
121 static int lru_shrinker_id(struct list_lru *lru)
122 {
123         return -1;
124 }
125
126 static inline bool list_lru_memcg_aware(struct list_lru *lru)
127 {
128         return false;
129 }
130
131 static inline struct list_lru_one *
132 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
133 {
134         return &lru->node[nid].lru;
135 }
136
137 static inline struct list_lru_one *
138 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
139                        bool irq, bool skip_empty)
140 {
141         struct list_lru_one *l = &lru->node[nid].lru;
142
143         if (irq)
144                 spin_lock_irq(&l->lock);
145         else
146                 spin_lock(&l->lock);
147
148         return l;
149 }
150
151 static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
152 {
153         if (irq_off)
154                 spin_unlock_irq(&l->lock);
155         else
156                 spin_unlock(&l->lock);
157 }
158 #endif /* CONFIG_MEMCG */
159
160 /* The caller must ensure the memcg lifetime. */
161 bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
162                   struct mem_cgroup *memcg)
163 {
164         struct list_lru_node *nlru = &lru->node[nid];
165         struct list_lru_one *l;
166
167         l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
168         if (!l)
169                 return false;
170         if (list_empty(item)) {
171                 list_add_tail(item, &l->list);
172                 /* Set shrinker bit if the first element was added */
173                 if (!l->nr_items++)
174                         set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
175                 unlock_list_lru(l, false);
176                 atomic_long_inc(&nlru->nr_items);
177                 return true;
178         }
179         unlock_list_lru(l, false);
180         return false;
181 }
182
183 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
184 {
185         bool ret;
186         int nid = page_to_nid(virt_to_page(item));
187
188         if (list_lru_memcg_aware(lru)) {
189                 rcu_read_lock();
190                 ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item));
191                 rcu_read_unlock();
192         } else {
193                 ret = list_lru_add(lru, item, nid, NULL);
194         }
195
196         return ret;
197 }
198 EXPORT_SYMBOL_GPL(list_lru_add_obj);
199
200 /* The caller must ensure the memcg lifetime. */
201 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
202                   struct mem_cgroup *memcg)
203 {
204         struct list_lru_node *nlru = &lru->node[nid];
205         struct list_lru_one *l;
206         l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
207         if (!l)
208                 return false;
209         if (!list_empty(item)) {
210                 list_del_init(item);
211                 l->nr_items--;
212                 unlock_list_lru(l, false);
213                 atomic_long_dec(&nlru->nr_items);
214                 return true;
215         }
216         unlock_list_lru(l, false);
217         return false;
218 }
219
220 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
221 {
222         bool ret;
223         int nid = page_to_nid(virt_to_page(item));
224
225         if (list_lru_memcg_aware(lru)) {
226                 rcu_read_lock();
227                 ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item));
228                 rcu_read_unlock();
229         } else {
230                 ret = list_lru_del(lru, item, nid, NULL);
231         }
232
233         return ret;
234 }
235 EXPORT_SYMBOL_GPL(list_lru_del_obj);
236
237 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
238 {
239         list_del_init(item);
240         list->nr_items--;
241 }
242 EXPORT_SYMBOL_GPL(list_lru_isolate);
243
244 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
245                            struct list_head *head)
246 {
247         list_move(item, head);
248         list->nr_items--;
249 }
250 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
251
252 unsigned long list_lru_count_one(struct list_lru *lru,
253                                  int nid, struct mem_cgroup *memcg)
254 {
255         struct list_lru_one *l;
256         long count;
257
258         rcu_read_lock();
259         l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
260         count = l ? READ_ONCE(l->nr_items) : 0;
261         rcu_read_unlock();
262
263         if (unlikely(count < 0))
264                 count = 0;
265
266         return count;
267 }
268 EXPORT_SYMBOL_GPL(list_lru_count_one);
269
270 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
271 {
272         struct list_lru_node *nlru;
273
274         nlru = &lru->node[nid];
275         return atomic_long_read(&nlru->nr_items);
276 }
277 EXPORT_SYMBOL_GPL(list_lru_count_node);
278
279 static unsigned long
280 __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
281                     list_lru_walk_cb isolate, void *cb_arg,
282                     unsigned long *nr_to_walk, bool irq_off)
283 {
284         struct list_lru_node *nlru = &lru->node[nid];
285         struct list_lru_one *l = NULL;
286         struct list_head *item, *n;
287         unsigned long isolated = 0;
288
289 restart:
290         l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true);
291         if (!l)
292                 return isolated;
293         list_for_each_safe(item, n, &l->list) {
294                 enum lru_status ret;
295
296                 /*
297                  * decrement nr_to_walk first so that we don't livelock if we
298                  * get stuck on large numbers of LRU_RETRY items
299                  */
300                 if (!*nr_to_walk)
301                         break;
302                 --*nr_to_walk;
303
304                 ret = isolate(item, l, cb_arg);
305                 switch (ret) {
306                 /*
307                  * LRU_RETRY, LRU_REMOVED_RETRY and LRU_STOP will drop the lru
308                  * lock. List traversal will have to restart from scratch.
309                  */
310                 case LRU_RETRY:
311                         goto restart;
312                 case LRU_REMOVED_RETRY:
313                         fallthrough;
314                 case LRU_REMOVED:
315                         isolated++;
316                         atomic_long_dec(&nlru->nr_items);
317                         if (ret == LRU_REMOVED_RETRY)
318                                 goto restart;
319                         break;
320                 case LRU_ROTATE:
321                         list_move_tail(item, &l->list);
322                         break;
323                 case LRU_SKIP:
324                         break;
325                 case LRU_STOP:
326                         goto out;
327                 default:
328                         BUG();
329                 }
330         }
331         unlock_list_lru(l, irq_off);
332 out:
333         return isolated;
334 }
335
336 unsigned long
337 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
338                   list_lru_walk_cb isolate, void *cb_arg,
339                   unsigned long *nr_to_walk)
340 {
341         return __list_lru_walk_one(lru, nid, memcg, isolate,
342                                    cb_arg, nr_to_walk, false);
343 }
344 EXPORT_SYMBOL_GPL(list_lru_walk_one);
345
346 unsigned long
347 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
348                       list_lru_walk_cb isolate, void *cb_arg,
349                       unsigned long *nr_to_walk)
350 {
351         return __list_lru_walk_one(lru, nid, memcg, isolate,
352                                    cb_arg, nr_to_walk, true);
353 }
354
355 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
356                                  list_lru_walk_cb isolate, void *cb_arg,
357                                  unsigned long *nr_to_walk)
358 {
359         long isolated = 0;
360
361         isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
362                                       nr_to_walk);
363
364 #ifdef CONFIG_MEMCG
365         if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
366                 struct list_lru_memcg *mlru;
367                 struct mem_cgroup *memcg;
368                 unsigned long index;
369
370                 xa_for_each(&lru->xa, index, mlru) {
371                         rcu_read_lock();
372                         memcg = mem_cgroup_from_id(index);
373                         if (!mem_cgroup_tryget(memcg)) {
374                                 rcu_read_unlock();
375                                 continue;
376                         }
377                         rcu_read_unlock();
378                         isolated += __list_lru_walk_one(lru, nid, memcg,
379                                                         isolate, cb_arg,
380                                                         nr_to_walk, false);
381                         mem_cgroup_put(memcg);
382
383                         if (*nr_to_walk <= 0)
384                                 break;
385                 }
386         }
387 #endif
388
389         return isolated;
390 }
391 EXPORT_SYMBOL_GPL(list_lru_walk_node);
392
393 static void init_one_lru(struct list_lru *lru, struct list_lru_one *l)
394 {
395         INIT_LIST_HEAD(&l->list);
396         spin_lock_init(&l->lock);
397         l->nr_items = 0;
398 #ifdef CONFIG_LOCKDEP
399         if (lru->key)
400                 lockdep_set_class(&l->lock, lru->key);
401 #endif
402 }
403
404 #ifdef CONFIG_MEMCG
405 static struct list_lru_memcg *memcg_init_list_lru_one(struct list_lru *lru, gfp_t gfp)
406 {
407         int nid;
408         struct list_lru_memcg *mlru;
409
410         mlru = kmalloc(struct_size(mlru, node, nr_node_ids), gfp);
411         if (!mlru)
412                 return NULL;
413
414         for_each_node(nid)
415                 init_one_lru(lru, &mlru->node[nid]);
416
417         return mlru;
418 }
419
420 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
421 {
422         if (memcg_aware)
423                 xa_init_flags(&lru->xa, XA_FLAGS_LOCK_IRQ);
424         lru->memcg_aware = memcg_aware;
425 }
426
427 static void memcg_destroy_list_lru(struct list_lru *lru)
428 {
429         XA_STATE(xas, &lru->xa, 0);
430         struct list_lru_memcg *mlru;
431
432         if (!list_lru_memcg_aware(lru))
433                 return;
434
435         xas_lock_irq(&xas);
436         xas_for_each(&xas, mlru, ULONG_MAX) {
437                 kfree(mlru);
438                 xas_store(&xas, NULL);
439         }
440         xas_unlock_irq(&xas);
441 }
442
443 static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid,
444                                         struct list_lru_one *src,
445                                         struct mem_cgroup *dst_memcg)
446 {
447         int dst_idx = dst_memcg->kmemcg_id;
448         struct list_lru_one *dst;
449
450         spin_lock_irq(&src->lock);
451         dst = list_lru_from_memcg_idx(lru, nid, dst_idx);
452         spin_lock_nested(&dst->lock, SINGLE_DEPTH_NESTING);
453
454         list_splice_init(&src->list, &dst->list);
455         if (src->nr_items) {
456                 WARN_ON(src->nr_items < 0);
457                 dst->nr_items += src->nr_items;
458                 set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
459         }
460         /* Mark the list_lru_one dead */
461         src->nr_items = LONG_MIN;
462
463         spin_unlock(&dst->lock);
464         spin_unlock_irq(&src->lock);
465 }
466
467 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent)
468 {
469         struct list_lru *lru;
470         int i;
471
472         mutex_lock(&list_lrus_mutex);
473         list_for_each_entry(lru, &memcg_list_lrus, list) {
474                 struct list_lru_memcg *mlru;
475                 XA_STATE(xas, &lru->xa, memcg->kmemcg_id);
476
477                 /*
478                  * Lock the Xarray to ensure no on going list_lru_memcg
479                  * allocation and further allocation will see css_is_dying().
480                  */
481                 xas_lock_irq(&xas);
482                 mlru = xas_store(&xas, NULL);
483                 xas_unlock_irq(&xas);
484                 if (!mlru)
485                         continue;
486
487                 /*
488                  * With Xarray value set to NULL, holding the lru lock below
489                  * prevents list_lru_{add,del,isolate} from touching the lru,
490                  * safe to reparent.
491                  */
492                 for_each_node(i)
493                         memcg_reparent_list_lru_one(lru, i, &mlru->node[i], parent);
494
495                 /*
496                  * Here all list_lrus corresponding to the cgroup are guaranteed
497                  * to remain empty, we can safely free this lru, any further
498                  * memcg_list_lru_alloc() call will simply bail out.
499                  */
500                 kvfree_rcu(mlru, rcu);
501         }
502         mutex_unlock(&list_lrus_mutex);
503 }
504
505 static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
506                                             struct list_lru *lru)
507 {
508         int idx = memcg->kmemcg_id;
509
510         return idx < 0 || xa_load(&lru->xa, idx);
511 }
512
513 int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
514                          gfp_t gfp)
515 {
516         unsigned long flags;
517         struct list_lru_memcg *mlru = NULL;
518         struct mem_cgroup *pos, *parent;
519         XA_STATE(xas, &lru->xa, 0);
520
521         if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
522                 return 0;
523
524         gfp &= GFP_RECLAIM_MASK;
525         /*
526          * Because the list_lru can be reparented to the parent cgroup's
527          * list_lru, we should make sure that this cgroup and all its
528          * ancestors have allocated list_lru_memcg.
529          */
530         do {
531                 /*
532                  * Keep finding the farest parent that wasn't populated
533                  * until found memcg itself.
534                  */
535                 pos = memcg;
536                 parent = parent_mem_cgroup(pos);
537                 while (!memcg_list_lru_allocated(parent, lru)) {
538                         pos = parent;
539                         parent = parent_mem_cgroup(pos);
540                 }
541
542                 if (!mlru) {
543                         mlru = memcg_init_list_lru_one(lru, gfp);
544                         if (!mlru)
545                                 return -ENOMEM;
546                 }
547                 xas_set(&xas, pos->kmemcg_id);
548                 do {
549                         xas_lock_irqsave(&xas, flags);
550                         if (!xas_load(&xas) && !css_is_dying(&pos->css)) {
551                                 xas_store(&xas, mlru);
552                                 if (!xas_error(&xas))
553                                         mlru = NULL;
554                         }
555                         xas_unlock_irqrestore(&xas, flags);
556                 } while (xas_nomem(&xas, gfp));
557         } while (pos != memcg && !css_is_dying(&pos->css));
558
559         if (unlikely(mlru))
560                 kfree(mlru);
561
562         return xas_error(&xas);
563 }
564 #else
565 static inline void memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
566 {
567 }
568
569 static void memcg_destroy_list_lru(struct list_lru *lru)
570 {
571 }
572 #endif /* CONFIG_MEMCG */
573
574 int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker)
575 {
576         int i;
577
578 #ifdef CONFIG_MEMCG
579         if (shrinker)
580                 lru->shrinker_id = shrinker->id;
581         else
582                 lru->shrinker_id = -1;
583
584         if (mem_cgroup_kmem_disabled())
585                 memcg_aware = false;
586 #endif
587
588         lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
589         if (!lru->node)
590                 return -ENOMEM;
591
592         for_each_node(i)
593                 init_one_lru(lru, &lru->node[i].lru);
594
595         memcg_init_list_lru(lru, memcg_aware);
596         list_lru_register(lru);
597
598         return 0;
599 }
600 EXPORT_SYMBOL_GPL(__list_lru_init);
601
602 void list_lru_destroy(struct list_lru *lru)
603 {
604         /* Already destroyed or not yet initialized? */
605         if (!lru->node)
606                 return;
607
608         list_lru_unregister(lru);
609
610         memcg_destroy_list_lru(lru);
611         kfree(lru->node);
612         lru->node = NULL;
613
614 #ifdef CONFIG_MEMCG
615         lru->shrinker_id = -1;
616 #endif
617 }
618 EXPORT_SYMBOL_GPL(list_lru_destroy);