Merge branch 'work.iov_iter' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[sfrench/cifs-2.6.git] / mm / page_counter.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Lockless hierarchical page accounting & limiting
4  *
5  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
6  */
7
8 #include <linux/page_counter.h>
9 #include <linux/atomic.h>
10 #include <linux/kernel.h>
11 #include <linux/string.h>
12 #include <linux/sched.h>
13 #include <linux/bug.h>
14 #include <asm/page.h>
15
16 /**
17  * page_counter_cancel - take pages out of the local counter
18  * @counter: counter
19  * @nr_pages: number of pages to cancel
20  */
21 void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
22 {
23         long new;
24
25         new = atomic_long_sub_return(nr_pages, &counter->count);
26         /* More uncharges than charges? */
27         WARN_ON_ONCE(new < 0);
28 }
29
30 /**
31  * page_counter_charge - hierarchically charge pages
32  * @counter: counter
33  * @nr_pages: number of pages to charge
34  *
35  * NOTE: This does not consider any configured counter limits.
36  */
37 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
38 {
39         struct page_counter *c;
40
41         for (c = counter; c; c = c->parent) {
42                 long new;
43
44                 new = atomic_long_add_return(nr_pages, &c->count);
45                 /*
46                  * This is indeed racy, but we can live with some
47                  * inaccuracy in the watermark.
48                  */
49                 if (new > c->watermark)
50                         c->watermark = new;
51         }
52 }
53
54 /**
55  * page_counter_try_charge - try to hierarchically charge pages
56  * @counter: counter
57  * @nr_pages: number of pages to charge
58  * @fail: points first counter to hit its limit, if any
59  *
60  * Returns %true on success, or %false and @fail if the counter or one
61  * of its ancestors has hit its configured limit.
62  */
63 bool page_counter_try_charge(struct page_counter *counter,
64                              unsigned long nr_pages,
65                              struct page_counter **fail)
66 {
67         struct page_counter *c;
68
69         for (c = counter; c; c = c->parent) {
70                 long new;
71                 /*
72                  * Charge speculatively to avoid an expensive CAS.  If
73                  * a bigger charge fails, it might falsely lock out a
74                  * racing smaller charge and send it into reclaim
75                  * early, but the error is limited to the difference
76                  * between the two sizes, which is less than 2M/4M in
77                  * case of a THP locking out a regular page charge.
78                  *
79                  * The atomic_long_add_return() implies a full memory
80                  * barrier between incrementing the count and reading
81                  * the limit.  When racing with page_counter_limit(),
82                  * we either see the new limit or the setter sees the
83                  * counter has changed and retries.
84                  */
85                 new = atomic_long_add_return(nr_pages, &c->count);
86                 if (new > c->limit) {
87                         atomic_long_sub(nr_pages, &c->count);
88                         /*
89                          * This is racy, but we can live with some
90                          * inaccuracy in the failcnt.
91                          */
92                         c->failcnt++;
93                         *fail = c;
94                         goto failed;
95                 }
96                 /*
97                  * Just like with failcnt, we can live with some
98                  * inaccuracy in the watermark.
99                  */
100                 if (new > c->watermark)
101                         c->watermark = new;
102         }
103         return true;
104
105 failed:
106         for (c = counter; c != *fail; c = c->parent)
107                 page_counter_cancel(c, nr_pages);
108
109         return false;
110 }
111
112 /**
113  * page_counter_uncharge - hierarchically uncharge pages
114  * @counter: counter
115  * @nr_pages: number of pages to uncharge
116  */
117 void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
118 {
119         struct page_counter *c;
120
121         for (c = counter; c; c = c->parent)
122                 page_counter_cancel(c, nr_pages);
123 }
124
125 /**
126  * page_counter_limit - limit the number of pages allowed
127  * @counter: counter
128  * @limit: limit to set
129  *
130  * Returns 0 on success, -EBUSY if the current number of pages on the
131  * counter already exceeds the specified limit.
132  *
133  * The caller must serialize invocations on the same counter.
134  */
135 int page_counter_limit(struct page_counter *counter, unsigned long limit)
136 {
137         for (;;) {
138                 unsigned long old;
139                 long count;
140
141                 /*
142                  * Update the limit while making sure that it's not
143                  * below the concurrently-changing counter value.
144                  *
145                  * The xchg implies two full memory barriers before
146                  * and after, so the read-swap-read is ordered and
147                  * ensures coherency with page_counter_try_charge():
148                  * that function modifies the count before checking
149                  * the limit, so if it sees the old limit, we see the
150                  * modified counter and retry.
151                  */
152                 count = atomic_long_read(&counter->count);
153
154                 if (count > limit)
155                         return -EBUSY;
156
157                 old = xchg(&counter->limit, limit);
158
159                 if (atomic_long_read(&counter->count) <= count)
160                         return 0;
161
162                 counter->limit = old;
163                 cond_resched();
164         }
165 }
166
167 /**
168  * page_counter_memparse - memparse() for page counter limits
169  * @buf: string to parse
170  * @max: string meaning maximum possible value
171  * @nr_pages: returns the result in number of pages
172  *
173  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
174  * limited to %PAGE_COUNTER_MAX.
175  */
176 int page_counter_memparse(const char *buf, const char *max,
177                           unsigned long *nr_pages)
178 {
179         char *end;
180         u64 bytes;
181
182         if (!strcmp(buf, max)) {
183                 *nr_pages = PAGE_COUNTER_MAX;
184                 return 0;
185         }
186
187         bytes = memparse(buf, &end);
188         if (*end != '\0')
189                 return -EINVAL;
190
191         *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
192
193         return 0;
194 }