Linux 6.9-rc4
[sfrench/cifs-2.6.git] / mm / list_lru.c
1 /*
2  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3  * Authors: David Chinner and Glauber Costa
4  *
5  * Generic LRU infrastructure
6  */
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/mm.h>
10 #include <linux/list_lru.h>
11 #include <linux/slab.h>
12 #include <linux/mutex.h>
13 #include <linux/memcontrol.h>
14
15 #ifdef CONFIG_MEMCG_KMEM
16 static LIST_HEAD(list_lrus);
17 static DEFINE_MUTEX(list_lrus_mutex);
18
19 static void list_lru_register(struct list_lru *lru)
20 {
21         mutex_lock(&list_lrus_mutex);
22         list_add(&lru->list, &list_lrus);
23         mutex_unlock(&list_lrus_mutex);
24 }
25
26 static void list_lru_unregister(struct list_lru *lru)
27 {
28         mutex_lock(&list_lrus_mutex);
29         list_del(&lru->list);
30         mutex_unlock(&list_lrus_mutex);
31 }
32
33 static int lru_shrinker_id(struct list_lru *lru)
34 {
35         return lru->shrinker_id;
36 }
37
38 static inline bool list_lru_memcg_aware(struct list_lru *lru)
39 {
40         /*
41          * This needs node 0 to be always present, even
42          * in the systems supporting sparse numa ids.
43          */
44         return !!lru->node[0].memcg_lrus;
45 }
46
47 static inline struct list_lru_one *
48 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
49 {
50         struct list_lru_memcg *memcg_lrus;
51         /*
52          * Either lock or RCU protects the array of per cgroup lists
53          * from relocation (see memcg_update_list_lru_node).
54          */
55         memcg_lrus = rcu_dereference_check(nlru->memcg_lrus,
56                                            lockdep_is_held(&nlru->lock));
57         if (memcg_lrus && idx >= 0)
58                 return memcg_lrus->lru[idx];
59         return &nlru->lru;
60 }
61
62 static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr)
63 {
64         struct page *page;
65
66         if (!memcg_kmem_enabled())
67                 return NULL;
68         page = virt_to_head_page(ptr);
69         return page->mem_cgroup;
70 }
71
72 static inline struct list_lru_one *
73 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
74                    struct mem_cgroup **memcg_ptr)
75 {
76         struct list_lru_one *l = &nlru->lru;
77         struct mem_cgroup *memcg = NULL;
78
79         if (!nlru->memcg_lrus)
80                 goto out;
81
82         memcg = mem_cgroup_from_kmem(ptr);
83         if (!memcg)
84                 goto out;
85
86         l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
87 out:
88         if (memcg_ptr)
89                 *memcg_ptr = memcg;
90         return l;
91 }
92 #else
93 static void list_lru_register(struct list_lru *lru)
94 {
95 }
96
97 static void list_lru_unregister(struct list_lru *lru)
98 {
99 }
100
101 static int lru_shrinker_id(struct list_lru *lru)
102 {
103         return -1;
104 }
105
106 static inline bool list_lru_memcg_aware(struct list_lru *lru)
107 {
108         return false;
109 }
110
111 static inline struct list_lru_one *
112 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
113 {
114         return &nlru->lru;
115 }
116
117 static inline struct list_lru_one *
118 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr,
119                    struct mem_cgroup **memcg_ptr)
120 {
121         if (memcg_ptr)
122                 *memcg_ptr = NULL;
123         return &nlru->lru;
124 }
125 #endif /* CONFIG_MEMCG_KMEM */
126
127 bool list_lru_add(struct list_lru *lru, struct list_head *item)
128 {
129         int nid = page_to_nid(virt_to_page(item));
130         struct list_lru_node *nlru = &lru->node[nid];
131         struct mem_cgroup *memcg;
132         struct list_lru_one *l;
133
134         spin_lock(&nlru->lock);
135         if (list_empty(item)) {
136                 l = list_lru_from_kmem(nlru, item, &memcg);
137                 list_add_tail(item, &l->list);
138                 /* Set shrinker bit if the first element was added */
139                 if (!l->nr_items++)
140                         memcg_set_shrinker_bit(memcg, nid,
141                                                lru_shrinker_id(lru));
142                 nlru->nr_items++;
143                 spin_unlock(&nlru->lock);
144                 return true;
145         }
146         spin_unlock(&nlru->lock);
147         return false;
148 }
149 EXPORT_SYMBOL_GPL(list_lru_add);
150
151 bool list_lru_del(struct list_lru *lru, struct list_head *item)
152 {
153         int nid = page_to_nid(virt_to_page(item));
154         struct list_lru_node *nlru = &lru->node[nid];
155         struct list_lru_one *l;
156
157         spin_lock(&nlru->lock);
158         if (!list_empty(item)) {
159                 l = list_lru_from_kmem(nlru, item, NULL);
160                 list_del_init(item);
161                 l->nr_items--;
162                 nlru->nr_items--;
163                 spin_unlock(&nlru->lock);
164                 return true;
165         }
166         spin_unlock(&nlru->lock);
167         return false;
168 }
169 EXPORT_SYMBOL_GPL(list_lru_del);
170
171 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
172 {
173         list_del_init(item);
174         list->nr_items--;
175 }
176 EXPORT_SYMBOL_GPL(list_lru_isolate);
177
178 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
179                            struct list_head *head)
180 {
181         list_move(item, head);
182         list->nr_items--;
183 }
184 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
185
186 unsigned long list_lru_count_one(struct list_lru *lru,
187                                  int nid, struct mem_cgroup *memcg)
188 {
189         struct list_lru_node *nlru = &lru->node[nid];
190         struct list_lru_one *l;
191         unsigned long count;
192
193         rcu_read_lock();
194         l = list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
195         count = l->nr_items;
196         rcu_read_unlock();
197
198         return count;
199 }
200 EXPORT_SYMBOL_GPL(list_lru_count_one);
201
202 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
203 {
204         struct list_lru_node *nlru;
205
206         nlru = &lru->node[nid];
207         return nlru->nr_items;
208 }
209 EXPORT_SYMBOL_GPL(list_lru_count_node);
210
211 static unsigned long
212 __list_lru_walk_one(struct list_lru_node *nlru, int memcg_idx,
213                     list_lru_walk_cb isolate, void *cb_arg,
214                     unsigned long *nr_to_walk)
215 {
216
217         struct list_lru_one *l;
218         struct list_head *item, *n;
219         unsigned long isolated = 0;
220
221         l = list_lru_from_memcg_idx(nlru, memcg_idx);
222 restart:
223         list_for_each_safe(item, n, &l->list) {
224                 enum lru_status ret;
225
226                 /*
227                  * decrement nr_to_walk first so that we don't livelock if we
228                  * get stuck on large numbesr of LRU_RETRY items
229                  */
230                 if (!*nr_to_walk)
231                         break;
232                 --*nr_to_walk;
233
234                 ret = isolate(item, l, &nlru->lock, cb_arg);
235                 switch (ret) {
236                 case LRU_REMOVED_RETRY:
237                         assert_spin_locked(&nlru->lock);
238                         /* fall through */
239                 case LRU_REMOVED:
240                         isolated++;
241                         nlru->nr_items--;
242                         /*
243                          * If the lru lock has been dropped, our list
244                          * traversal is now invalid and so we have to
245                          * restart from scratch.
246                          */
247                         if (ret == LRU_REMOVED_RETRY)
248                                 goto restart;
249                         break;
250                 case LRU_ROTATE:
251                         list_move_tail(item, &l->list);
252                         break;
253                 case LRU_SKIP:
254                         break;
255                 case LRU_RETRY:
256                         /*
257                          * The lru lock has been dropped, our list traversal is
258                          * now invalid and so we have to restart from scratch.
259                          */
260                         assert_spin_locked(&nlru->lock);
261                         goto restart;
262                 default:
263                         BUG();
264                 }
265         }
266         return isolated;
267 }
268
269 unsigned long
270 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
271                   list_lru_walk_cb isolate, void *cb_arg,
272                   unsigned long *nr_to_walk)
273 {
274         struct list_lru_node *nlru = &lru->node[nid];
275         unsigned long ret;
276
277         spin_lock(&nlru->lock);
278         ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
279                                   nr_to_walk);
280         spin_unlock(&nlru->lock);
281         return ret;
282 }
283 EXPORT_SYMBOL_GPL(list_lru_walk_one);
284
285 unsigned long
286 list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
287                       list_lru_walk_cb isolate, void *cb_arg,
288                       unsigned long *nr_to_walk)
289 {
290         struct list_lru_node *nlru = &lru->node[nid];
291         unsigned long ret;
292
293         spin_lock_irq(&nlru->lock);
294         ret = __list_lru_walk_one(nlru, memcg_cache_id(memcg), isolate, cb_arg,
295                                   nr_to_walk);
296         spin_unlock_irq(&nlru->lock);
297         return ret;
298 }
299
300 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
301                                  list_lru_walk_cb isolate, void *cb_arg,
302                                  unsigned long *nr_to_walk)
303 {
304         long isolated = 0;
305         int memcg_idx;
306
307         isolated += list_lru_walk_one(lru, nid, NULL, isolate, cb_arg,
308                                       nr_to_walk);
309         if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
310                 for_each_memcg_cache_index(memcg_idx) {
311                         struct list_lru_node *nlru = &lru->node[nid];
312
313                         spin_lock(&nlru->lock);
314                         isolated += __list_lru_walk_one(nlru, memcg_idx,
315                                                         isolate, cb_arg,
316                                                         nr_to_walk);
317                         spin_unlock(&nlru->lock);
318
319                         if (*nr_to_walk <= 0)
320                                 break;
321                 }
322         }
323         return isolated;
324 }
325 EXPORT_SYMBOL_GPL(list_lru_walk_node);
326
327 static void init_one_lru(struct list_lru_one *l)
328 {
329         INIT_LIST_HEAD(&l->list);
330         l->nr_items = 0;
331 }
332
333 #ifdef CONFIG_MEMCG_KMEM
334 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
335                                           int begin, int end)
336 {
337         int i;
338
339         for (i = begin; i < end; i++)
340                 kfree(memcg_lrus->lru[i]);
341 }
342
343 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
344                                       int begin, int end)
345 {
346         int i;
347
348         for (i = begin; i < end; i++) {
349                 struct list_lru_one *l;
350
351                 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
352                 if (!l)
353                         goto fail;
354
355                 init_one_lru(l);
356                 memcg_lrus->lru[i] = l;
357         }
358         return 0;
359 fail:
360         __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
361         return -ENOMEM;
362 }
363
364 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
365 {
366         struct list_lru_memcg *memcg_lrus;
367         int size = memcg_nr_cache_ids;
368
369         memcg_lrus = kvmalloc(sizeof(*memcg_lrus) +
370                               size * sizeof(void *), GFP_KERNEL);
371         if (!memcg_lrus)
372                 return -ENOMEM;
373
374         if (__memcg_init_list_lru_node(memcg_lrus, 0, size)) {
375                 kvfree(memcg_lrus);
376                 return -ENOMEM;
377         }
378         RCU_INIT_POINTER(nlru->memcg_lrus, memcg_lrus);
379
380         return 0;
381 }
382
383 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
384 {
385         struct list_lru_memcg *memcg_lrus;
386         /*
387          * This is called when shrinker has already been unregistered,
388          * and nobody can use it. So, there is no need to use kvfree_rcu().
389          */
390         memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
391         __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
392         kvfree(memcg_lrus);
393 }
394
395 static void kvfree_rcu(struct rcu_head *head)
396 {
397         struct list_lru_memcg *mlru;
398
399         mlru = container_of(head, struct list_lru_memcg, rcu);
400         kvfree(mlru);
401 }
402
403 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
404                                       int old_size, int new_size)
405 {
406         struct list_lru_memcg *old, *new;
407
408         BUG_ON(old_size > new_size);
409
410         old = rcu_dereference_protected(nlru->memcg_lrus,
411                                         lockdep_is_held(&list_lrus_mutex));
412         new = kvmalloc(sizeof(*new) + new_size * sizeof(void *), GFP_KERNEL);
413         if (!new)
414                 return -ENOMEM;
415
416         if (__memcg_init_list_lru_node(new, old_size, new_size)) {
417                 kvfree(new);
418                 return -ENOMEM;
419         }
420
421         memcpy(&new->lru, &old->lru, old_size * sizeof(void *));
422
423         /*
424          * The locking below allows readers that hold nlru->lock avoid taking
425          * rcu_read_lock (see list_lru_from_memcg_idx).
426          *
427          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
428          * we have to use IRQ-safe primitives here to avoid deadlock.
429          */
430         spin_lock_irq(&nlru->lock);
431         rcu_assign_pointer(nlru->memcg_lrus, new);
432         spin_unlock_irq(&nlru->lock);
433
434         call_rcu(&old->rcu, kvfree_rcu);
435         return 0;
436 }
437
438 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
439                                               int old_size, int new_size)
440 {
441         struct list_lru_memcg *memcg_lrus;
442
443         memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus,
444                                                lockdep_is_held(&list_lrus_mutex));
445         /* do not bother shrinking the array back to the old size, because we
446          * cannot handle allocation failures here */
447         __memcg_destroy_list_lru_node(memcg_lrus, old_size, new_size);
448 }
449
450 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
451 {
452         int i;
453
454         if (!memcg_aware)
455                 return 0;
456
457         for_each_node(i) {
458                 if (memcg_init_list_lru_node(&lru->node[i]))
459                         goto fail;
460         }
461         return 0;
462 fail:
463         for (i = i - 1; i >= 0; i--) {
464                 if (!lru->node[i].memcg_lrus)
465                         continue;
466                 memcg_destroy_list_lru_node(&lru->node[i]);
467         }
468         return -ENOMEM;
469 }
470
471 static void memcg_destroy_list_lru(struct list_lru *lru)
472 {
473         int i;
474
475         if (!list_lru_memcg_aware(lru))
476                 return;
477
478         for_each_node(i)
479                 memcg_destroy_list_lru_node(&lru->node[i]);
480 }
481
482 static int memcg_update_list_lru(struct list_lru *lru,
483                                  int old_size, int new_size)
484 {
485         int i;
486
487         if (!list_lru_memcg_aware(lru))
488                 return 0;
489
490         for_each_node(i) {
491                 if (memcg_update_list_lru_node(&lru->node[i],
492                                                old_size, new_size))
493                         goto fail;
494         }
495         return 0;
496 fail:
497         for (i = i - 1; i >= 0; i--) {
498                 if (!lru->node[i].memcg_lrus)
499                         continue;
500
501                 memcg_cancel_update_list_lru_node(&lru->node[i],
502                                                   old_size, new_size);
503         }
504         return -ENOMEM;
505 }
506
507 static void memcg_cancel_update_list_lru(struct list_lru *lru,
508                                          int old_size, int new_size)
509 {
510         int i;
511
512         if (!list_lru_memcg_aware(lru))
513                 return;
514
515         for_each_node(i)
516                 memcg_cancel_update_list_lru_node(&lru->node[i],
517                                                   old_size, new_size);
518 }
519
520 int memcg_update_all_list_lrus(int new_size)
521 {
522         int ret = 0;
523         struct list_lru *lru;
524         int old_size = memcg_nr_cache_ids;
525
526         mutex_lock(&list_lrus_mutex);
527         list_for_each_entry(lru, &list_lrus, list) {
528                 ret = memcg_update_list_lru(lru, old_size, new_size);
529                 if (ret)
530                         goto fail;
531         }
532 out:
533         mutex_unlock(&list_lrus_mutex);
534         return ret;
535 fail:
536         list_for_each_entry_continue_reverse(lru, &list_lrus, list)
537                 memcg_cancel_update_list_lru(lru, old_size, new_size);
538         goto out;
539 }
540
541 static void memcg_drain_list_lru_node(struct list_lru *lru, int nid,
542                                       int src_idx, struct mem_cgroup *dst_memcg)
543 {
544         struct list_lru_node *nlru = &lru->node[nid];
545         int dst_idx = dst_memcg->kmemcg_id;
546         struct list_lru_one *src, *dst;
547         bool set;
548
549         /*
550          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
551          * we have to use IRQ-safe primitives here to avoid deadlock.
552          */
553         spin_lock_irq(&nlru->lock);
554
555         src = list_lru_from_memcg_idx(nlru, src_idx);
556         dst = list_lru_from_memcg_idx(nlru, dst_idx);
557
558         list_splice_init(&src->list, &dst->list);
559         set = (!dst->nr_items && src->nr_items);
560         dst->nr_items += src->nr_items;
561         if (set)
562                 memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
563         src->nr_items = 0;
564
565         spin_unlock_irq(&nlru->lock);
566 }
567
568 static void memcg_drain_list_lru(struct list_lru *lru,
569                                  int src_idx, struct mem_cgroup *dst_memcg)
570 {
571         int i;
572
573         if (!list_lru_memcg_aware(lru))
574                 return;
575
576         for_each_node(i)
577                 memcg_drain_list_lru_node(lru, i, src_idx, dst_memcg);
578 }
579
580 void memcg_drain_all_list_lrus(int src_idx, struct mem_cgroup *dst_memcg)
581 {
582         struct list_lru *lru;
583
584         mutex_lock(&list_lrus_mutex);
585         list_for_each_entry(lru, &list_lrus, list)
586                 memcg_drain_list_lru(lru, src_idx, dst_memcg);
587         mutex_unlock(&list_lrus_mutex);
588 }
589 #else
590 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
591 {
592         return 0;
593 }
594
595 static void memcg_destroy_list_lru(struct list_lru *lru)
596 {
597 }
598 #endif /* CONFIG_MEMCG_KMEM */
599
600 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
601                     struct lock_class_key *key, struct shrinker *shrinker)
602 {
603         int i;
604         int err = -ENOMEM;
605
606 #ifdef CONFIG_MEMCG_KMEM
607         if (shrinker)
608                 lru->shrinker_id = shrinker->id;
609         else
610                 lru->shrinker_id = -1;
611 #endif
612         memcg_get_cache_ids();
613
614         lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
615         if (!lru->node)
616                 goto out;
617
618         for_each_node(i) {
619                 spin_lock_init(&lru->node[i].lock);
620                 if (key)
621                         lockdep_set_class(&lru->node[i].lock, key);
622                 init_one_lru(&lru->node[i].lru);
623         }
624
625         err = memcg_init_list_lru(lru, memcg_aware);
626         if (err) {
627                 kfree(lru->node);
628                 /* Do this so a list_lru_destroy() doesn't crash: */
629                 lru->node = NULL;
630                 goto out;
631         }
632
633         list_lru_register(lru);
634 out:
635         memcg_put_cache_ids();
636         return err;
637 }
638 EXPORT_SYMBOL_GPL(__list_lru_init);
639
640 void list_lru_destroy(struct list_lru *lru)
641 {
642         /* Already destroyed or not yet initialized? */
643         if (!lru->node)
644                 return;
645
646         memcg_get_cache_ids();
647
648         list_lru_unregister(lru);
649
650         memcg_destroy_list_lru(lru);
651         kfree(lru->node);
652         lru->node = NULL;
653
654 #ifdef CONFIG_MEMCG_KMEM
655         lru->shrinker_id = -1;
656 #endif
657         memcg_put_cache_ids();
658 }
659 EXPORT_SYMBOL_GPL(list_lru_destroy);