mm: move is_pageblock_removable_nolock() to mm/memory_hotplug.c
[linux-block.git] / mm / page_counter.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
3e32cb2e
JW
2/*
3 * Lockless hierarchical page accounting & limiting
4 *
5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
6 */
7
8#include <linux/page_counter.h>
9#include <linux/atomic.h>
10#include <linux/kernel.h>
11#include <linux/string.h>
12#include <linux/sched.h>
13#include <linux/bug.h>
14#include <asm/page.h>
15
23067153
RG
16static void propagate_low_usage(struct page_counter *c, unsigned long usage)
17{
18 unsigned long low_usage, old;
19 long delta;
20
21 if (!c->parent)
22 return;
23
24 if (!c->low && !atomic_long_read(&c->low_usage))
25 return;
26
27 if (usage <= c->low)
28 low_usage = usage;
29 else
30 low_usage = 0;
31
32 old = atomic_long_xchg(&c->low_usage, low_usage);
33 delta = low_usage - old;
34 if (delta)
35 atomic_long_add(delta, &c->parent->children_low_usage);
36}
37
3e32cb2e
JW
38/**
39 * page_counter_cancel - take pages out of the local counter
40 * @counter: counter
41 * @nr_pages: number of pages to cancel
3e32cb2e 42 */
64f21993 43void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
3e32cb2e
JW
44{
45 long new;
46
bbec2e15 47 new = atomic_long_sub_return(nr_pages, &counter->usage);
23067153 48 propagate_low_usage(counter, new);
3e32cb2e
JW
49 /* More uncharges than charges? */
50 WARN_ON_ONCE(new < 0);
3e32cb2e
JW
51}
52
53/**
54 * page_counter_charge - hierarchically charge pages
55 * @counter: counter
56 * @nr_pages: number of pages to charge
57 *
58 * NOTE: This does not consider any configured counter limits.
59 */
60void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
61{
62 struct page_counter *c;
63
64 for (c = counter; c; c = c->parent) {
65 long new;
66
bbec2e15 67 new = atomic_long_add_return(nr_pages, &c->usage);
23067153 68 propagate_low_usage(counter, new);
3e32cb2e
JW
69 /*
70 * This is indeed racy, but we can live with some
71 * inaccuracy in the watermark.
72 */
73 if (new > c->watermark)
74 c->watermark = new;
75 }
76}
77
78/**
79 * page_counter_try_charge - try to hierarchically charge pages
80 * @counter: counter
81 * @nr_pages: number of pages to charge
82 * @fail: points first counter to hit its limit, if any
83 *
6071ca52
JW
84 * Returns %true on success, or %false and @fail if the counter or one
85 * of its ancestors has hit its configured limit.
3e32cb2e 86 */
6071ca52
JW
87bool page_counter_try_charge(struct page_counter *counter,
88 unsigned long nr_pages,
89 struct page_counter **fail)
3e32cb2e
JW
90{
91 struct page_counter *c;
92
93 for (c = counter; c; c = c->parent) {
94 long new;
95 /*
96 * Charge speculatively to avoid an expensive CAS. If
97 * a bigger charge fails, it might falsely lock out a
98 * racing smaller charge and send it into reclaim
99 * early, but the error is limited to the difference
100 * between the two sizes, which is less than 2M/4M in
101 * case of a THP locking out a regular page charge.
102 *
103 * The atomic_long_add_return() implies a full memory
104 * barrier between incrementing the count and reading
105 * the limit. When racing with page_counter_limit(),
106 * we either see the new limit or the setter sees the
107 * counter has changed and retries.
108 */
bbec2e15
RG
109 new = atomic_long_add_return(nr_pages, &c->usage);
110 if (new > c->max) {
111 atomic_long_sub(nr_pages, &c->usage);
23067153 112 propagate_low_usage(counter, new);
3e32cb2e
JW
113 /*
114 * This is racy, but we can live with some
115 * inaccuracy in the failcnt.
116 */
117 c->failcnt++;
118 *fail = c;
119 goto failed;
120 }
23067153 121 propagate_low_usage(counter, new);
3e32cb2e
JW
122 /*
123 * Just like with failcnt, we can live with some
124 * inaccuracy in the watermark.
125 */
126 if (new > c->watermark)
127 c->watermark = new;
128 }
6071ca52 129 return true;
3e32cb2e
JW
130
131failed:
132 for (c = counter; c != *fail; c = c->parent)
133 page_counter_cancel(c, nr_pages);
134
6071ca52 135 return false;
3e32cb2e
JW
136}
137
138/**
139 * page_counter_uncharge - hierarchically uncharge pages
140 * @counter: counter
141 * @nr_pages: number of pages to uncharge
3e32cb2e 142 */
64f21993 143void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
3e32cb2e
JW
144{
145 struct page_counter *c;
3e32cb2e 146
64f21993
JW
147 for (c = counter; c; c = c->parent)
148 page_counter_cancel(c, nr_pages);
3e32cb2e
JW
149}
150
151/**
bbec2e15 152 * page_counter_set_max - set the maximum number of pages allowed
3e32cb2e 153 * @counter: counter
bbec2e15 154 * @nr_pages: limit to set
3e32cb2e
JW
155 *
156 * Returns 0 on success, -EBUSY if the current number of pages on the
157 * counter already exceeds the specified limit.
158 *
159 * The caller must serialize invocations on the same counter.
160 */
bbec2e15 161int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages)
3e32cb2e
JW
162{
163 for (;;) {
164 unsigned long old;
bbec2e15 165 long usage;
3e32cb2e
JW
166
167 /*
168 * Update the limit while making sure that it's not
169 * below the concurrently-changing counter value.
170 *
171 * The xchg implies two full memory barriers before
172 * and after, so the read-swap-read is ordered and
173 * ensures coherency with page_counter_try_charge():
174 * that function modifies the count before checking
175 * the limit, so if it sees the old limit, we see the
176 * modified counter and retry.
177 */
bbec2e15 178 usage = atomic_long_read(&counter->usage);
3e32cb2e 179
bbec2e15 180 if (usage > nr_pages)
3e32cb2e
JW
181 return -EBUSY;
182
bbec2e15 183 old = xchg(&counter->max, nr_pages);
3e32cb2e 184
bbec2e15 185 if (atomic_long_read(&counter->usage) <= usage)
3e32cb2e
JW
186 return 0;
187
bbec2e15 188 counter->max = old;
3e32cb2e
JW
189 cond_resched();
190 }
191}
192
23067153
RG
193/**
194 * page_counter_set_low - set the amount of protected memory
195 * @counter: counter
196 * @nr_pages: value to set
197 *
198 * The caller must serialize invocations on the same counter.
199 */
200void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
201{
202 struct page_counter *c;
203
204 counter->low = nr_pages;
205
206 for (c = counter; c; c = c->parent)
207 propagate_low_usage(c, atomic_long_read(&c->usage));
208}
209
3e32cb2e
JW
210/**
211 * page_counter_memparse - memparse() for page counter limits
212 * @buf: string to parse
650c5e56 213 * @max: string meaning maximum possible value
3e32cb2e
JW
214 * @nr_pages: returns the result in number of pages
215 *
216 * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be
217 * limited to %PAGE_COUNTER_MAX.
218 */
650c5e56
JW
219int page_counter_memparse(const char *buf, const char *max,
220 unsigned long *nr_pages)
3e32cb2e 221{
3e32cb2e
JW
222 char *end;
223 u64 bytes;
224
650c5e56 225 if (!strcmp(buf, max)) {
3e32cb2e
JW
226 *nr_pages = PAGE_COUNTER_MAX;
227 return 0;
228 }
229
230 bytes = memparse(buf, &end);
231 if (*end != '\0')
232 return -EINVAL;
233
234 *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
235
236 return 0;
237}