Commit | Line | Data |
---|---|---|
29b24f6c | 1 | // SPDX-License-Identifier: GPL-2.0-only |
b29e64d8 | 2 | /* |
b29e64d8 GX |
3 | * Copyright (C) 2018 HUAWEI, Inc. |
4 | * http://www.huawei.com/ | |
5 | * Created by Gao Xiang <gaoxiang25@huawei.com> | |
b29e64d8 | 6 | */ |
b29e64d8 | 7 | #include "internal.h" |
3883a79a | 8 | #include <linux/pagevec.h> |
b29e64d8 | 9 | |
b25a1519 | 10 | struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail) |
b29e64d8 GX |
11 | { |
12 | struct page *page; | |
13 | ||
14 | if (!list_empty(pool)) { | |
15 | page = lru_to_page(pool); | |
b25a1519 | 16 | DBG_BUGON(page_ref_count(page) != 1); |
b29e64d8 GX |
17 | list_del(&page->lru); |
18 | } else { | |
b25a1519 | 19 | page = alloc_pages(gfp | (nofail ? __GFP_NOFAIL : 0), 0); |
b29e64d8 GX |
20 | } |
21 | return page; | |
22 | } | |
23 | ||
fa61a33f GX |
24 | #if (EROFS_PCPUBUF_NR_PAGES > 0) |
25 | static struct { | |
26 | u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; | |
27 | } ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; | |
28 | ||
29 | void *erofs_get_pcpubuf(unsigned int pagenr) | |
30 | { | |
31 | preempt_disable(); | |
32 | return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; | |
33 | } | |
34 | #endif | |
35 | ||
22fe04a7 | 36 | #ifdef CONFIG_EROFS_FS_ZIP |
e7e9a307 GX |
37 | /* global shrink count (for all mounted EROFS instances) */ |
38 | static atomic_long_t erofs_global_shrink_cnt; | |
39 | ||
d60eff43 GX |
40 | #define __erofs_workgroup_get(grp) atomic_inc(&(grp)->refcount) |
41 | #define __erofs_workgroup_put(grp) atomic_dec(&(grp)->refcount) | |
42 | ||
4501ca36 | 43 | static int erofs_workgroup_get(struct erofs_workgroup *grp) |
d60eff43 GX |
44 | { |
45 | int o; | |
46 | ||
47 | repeat: | |
48 | o = erofs_wait_on_workgroup_freezed(grp); | |
8d8a09b0 | 49 | if (o <= 0) |
d60eff43 GX |
50 | return -1; |
51 | ||
8d8a09b0 | 52 | if (atomic_cmpxchg(&grp->refcount, o, o + 1) != o) |
d60eff43 GX |
53 | goto repeat; |
54 | ||
4501ca36 | 55 | /* decrease refcount paired by erofs_workgroup_put */ |
8d8a09b0 | 56 | if (o == 1) |
4501ca36 | 57 | atomic_long_dec(&erofs_global_shrink_cnt); |
d60eff43 GX |
58 | return 0; |
59 | } | |
e7e9a307 | 60 | |
4501ca36 GX |
61 | struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, |
62 | pgoff_t index, bool *tag) | |
e7e9a307 GX |
63 | { |
64 | struct erofs_sb_info *sbi = EROFS_SB(sb); | |
65 | struct erofs_workgroup *grp; | |
e7e9a307 GX |
66 | |
67 | repeat: | |
68 | rcu_read_lock(); | |
69 | grp = radix_tree_lookup(&sbi->workstn_tree, index); | |
561fb35a | 70 | if (grp) { |
3159f943 MW |
71 | *tag = xa_pointer_tag(grp); |
72 | grp = xa_untag_pointer(grp); | |
e7e9a307 | 73 | |
4501ca36 | 74 | if (erofs_workgroup_get(grp)) { |
e7e9a307 GX |
75 | /* prefer to relax rcu read side */ |
76 | rcu_read_unlock(); | |
77 | goto repeat; | |
78 | } | |
79 | ||
b8e076a6 | 80 | DBG_BUGON(index != grp->index); |
e7e9a307 GX |
81 | } |
82 | rcu_read_unlock(); | |
83 | return grp; | |
84 | } | |
85 | ||
86 | int erofs_register_workgroup(struct super_block *sb, | |
87 | struct erofs_workgroup *grp, | |
88 | bool tag) | |
89 | { | |
90 | struct erofs_sb_info *sbi; | |
91 | int err; | |
92 | ||
b8e076a6 | 93 | /* grp shouldn't be broken or used before */ |
8d8a09b0 | 94 | if (atomic_read(&grp->refcount) != 1) { |
b8e076a6 GX |
95 | DBG_BUGON(1); |
96 | return -EINVAL; | |
97 | } | |
e7e9a307 GX |
98 | |
99 | err = radix_tree_preload(GFP_NOFS); | |
100 | if (err) | |
101 | return err; | |
102 | ||
103 | sbi = EROFS_SB(sb); | |
b1897c60 | 104 | xa_lock(&sbi->workstn_tree); |
e7e9a307 | 105 | |
3159f943 | 106 | grp = xa_tag_pointer(grp, tag); |
e7e9a307 | 107 | |
51232df5 GX |
108 | /* |
109 | * Bump up reference count before making this workgroup | |
110 | * visible to other users in order to avoid potential UAF | |
b1897c60 | 111 | * without serialized by workstn_lock. |
51232df5 GX |
112 | */ |
113 | __erofs_workgroup_get(grp); | |
e7e9a307 | 114 | |
2bb90cc2 | 115 | err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp); |
8d8a09b0 | 116 | if (err) |
51232df5 GX |
117 | /* |
118 | * it's safe to decrease since the workgroup isn't visible | |
119 | * and refcount >= 2 (cannot be freezed). | |
120 | */ | |
121 | __erofs_workgroup_put(grp); | |
e7e9a307 | 122 | |
b1897c60 | 123 | xa_unlock(&sbi->workstn_tree); |
e7e9a307 GX |
124 | radix_tree_preload_end(); |
125 | return err; | |
126 | } | |
127 | ||
51232df5 GX |
128 | static void __erofs_workgroup_free(struct erofs_workgroup *grp) |
129 | { | |
130 | atomic_long_dec(&erofs_global_shrink_cnt); | |
131 | erofs_workgroup_free_rcu(grp); | |
132 | } | |
133 | ||
3883a79a GX |
134 | int erofs_workgroup_put(struct erofs_workgroup *grp) |
135 | { | |
136 | int count = atomic_dec_return(&grp->refcount); | |
137 | ||
138 | if (count == 1) | |
139 | atomic_long_inc(&erofs_global_shrink_cnt); | |
51232df5 GX |
140 | else if (!count) |
141 | __erofs_workgroup_free(grp); | |
3883a79a GX |
142 | return count; |
143 | } | |
144 | ||
51232df5 GX |
145 | static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp) |
146 | { | |
147 | erofs_workgroup_unfreeze(grp, 0); | |
148 | __erofs_workgroup_free(grp); | |
149 | } | |
150 | ||
0a64d62d JS |
151 | static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, |
152 | struct erofs_workgroup *grp, | |
153 | bool cleanup) | |
51232df5 GX |
154 | { |
155 | /* | |
2bb90cc2 GX |
156 | * If managed cache is on, refcount of workgroups |
157 | * themselves could be < 0 (freezed). In other words, | |
158 | * there is no guarantee that all refcounts > 0. | |
51232df5 GX |
159 | */ |
160 | if (!erofs_workgroup_try_to_freeze(grp, 1)) | |
161 | return false; | |
162 | ||
163 | /* | |
2bb90cc2 GX |
164 | * Note that all cached pages should be unattached |
165 | * before deleted from the radix tree. Otherwise some | |
166 | * cached pages could be still attached to the orphan | |
167 | * old workgroup when the new one is available in the tree. | |
51232df5 GX |
168 | */ |
169 | if (erofs_try_to_free_all_cached_pages(sbi, grp)) { | |
170 | erofs_workgroup_unfreeze(grp, 1); | |
171 | return false; | |
172 | } | |
173 | ||
174 | /* | |
2bb90cc2 | 175 | * It's impossible to fail after the workgroup is freezed, |
51232df5 GX |
176 | * however in order to avoid some race conditions, add a |
177 | * DBG_BUGON to observe this in advance. | |
178 | */ | |
179 | DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree, | |
180 | grp->index)) != grp); | |
181 | ||
182 | /* | |
2bb90cc2 GX |
183 | * If managed cache is on, last refcount should indicate |
184 | * the related workstation. | |
51232df5 GX |
185 | */ |
186 | erofs_workgroup_unfreeze_final(grp); | |
187 | return true; | |
188 | } | |
189 | ||
22fe04a7 GX |
190 | static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, |
191 | unsigned long nr_shrink, | |
192 | bool cleanup) | |
e7e9a307 | 193 | { |
3883a79a GX |
194 | pgoff_t first_index = 0; |
195 | void *batch[PAGEVEC_SIZE]; | |
7dd68b14 | 196 | unsigned int freed = 0; |
3883a79a GX |
197 | |
198 | int i, found; | |
199 | repeat: | |
b1897c60 | 200 | xa_lock(&sbi->workstn_tree); |
3883a79a GX |
201 | |
202 | found = radix_tree_gang_lookup(&sbi->workstn_tree, | |
447a3621 | 203 | batch, first_index, PAGEVEC_SIZE); |
3883a79a GX |
204 | |
205 | for (i = 0; i < found; ++i) { | |
3159f943 | 206 | struct erofs_workgroup *grp = xa_untag_pointer(batch[i]); |
3883a79a GX |
207 | |
208 | first_index = grp->index + 1; | |
209 | ||
51232df5 GX |
210 | /* try to shrink each valid workgroup */ |
211 | if (!erofs_try_to_release_workgroup(sbi, grp, cleanup)) | |
3883a79a | 212 | continue; |
3883a79a GX |
213 | |
214 | ++freed; | |
8d8a09b0 | 215 | if (!--nr_shrink) |
3883a79a GX |
216 | break; |
217 | } | |
b1897c60 | 218 | xa_unlock(&sbi->workstn_tree); |
3883a79a GX |
219 | |
220 | if (i && nr_shrink) | |
221 | goto repeat; | |
222 | return freed; | |
e7e9a307 GX |
223 | } |
224 | ||
a1581312 GX |
225 | /* protected by 'erofs_sb_list_lock' */ |
226 | static unsigned int shrinker_run_no; | |
227 | ||
228 | /* protects the mounted 'erofs_sb_list' */ | |
229 | static DEFINE_SPINLOCK(erofs_sb_list_lock); | |
2497ee41 GX |
230 | static LIST_HEAD(erofs_sb_list); |
231 | ||
22fe04a7 | 232 | void erofs_shrinker_register(struct super_block *sb) |
2497ee41 | 233 | { |
a1581312 GX |
234 | struct erofs_sb_info *sbi = EROFS_SB(sb); |
235 | ||
236 | mutex_init(&sbi->umount_mutex); | |
237 | ||
238 | spin_lock(&erofs_sb_list_lock); | |
239 | list_add(&sbi->list, &erofs_sb_list); | |
240 | spin_unlock(&erofs_sb_list_lock); | |
2497ee41 GX |
241 | } |
242 | ||
22fe04a7 | 243 | void erofs_shrinker_unregister(struct super_block *sb) |
2497ee41 | 244 | { |
22fe04a7 GX |
245 | struct erofs_sb_info *const sbi = EROFS_SB(sb); |
246 | ||
247 | mutex_lock(&sbi->umount_mutex); | |
248 | erofs_shrink_workstation(sbi, ~0UL, true); | |
249 | ||
a1581312 | 250 | spin_lock(&erofs_sb_list_lock); |
22fe04a7 | 251 | list_del(&sbi->list); |
a1581312 | 252 | spin_unlock(&erofs_sb_list_lock); |
22fe04a7 | 253 | mutex_unlock(&sbi->umount_mutex); |
a1581312 GX |
254 | } |
255 | ||
d55bc7ba GX |
256 | static unsigned long erofs_shrink_count(struct shrinker *shrink, |
257 | struct shrink_control *sc) | |
a1581312 GX |
258 | { |
259 | return atomic_long_read(&erofs_global_shrink_cnt); | |
260 | } | |
261 | ||
d55bc7ba GX |
262 | static unsigned long erofs_shrink_scan(struct shrinker *shrink, |
263 | struct shrink_control *sc) | |
a1581312 GX |
264 | { |
265 | struct erofs_sb_info *sbi; | |
266 | struct list_head *p; | |
267 | ||
268 | unsigned long nr = sc->nr_to_scan; | |
269 | unsigned int run_no; | |
270 | unsigned long freed = 0; | |
271 | ||
272 | spin_lock(&erofs_sb_list_lock); | |
2bb90cc2 | 273 | do { |
a1581312 | 274 | run_no = ++shrinker_run_no; |
2bb90cc2 | 275 | } while (run_no == 0); |
a1581312 GX |
276 | |
277 | /* Iterate over all mounted superblocks and try to shrink them */ | |
278 | p = erofs_sb_list.next; | |
279 | while (p != &erofs_sb_list) { | |
280 | sbi = list_entry(p, struct erofs_sb_info, list); | |
281 | ||
282 | /* | |
283 | * We move the ones we do to the end of the list, so we stop | |
284 | * when we see one we have already done. | |
285 | */ | |
286 | if (sbi->shrinker_run_no == run_no) | |
287 | break; | |
288 | ||
289 | if (!mutex_trylock(&sbi->umount_mutex)) { | |
290 | p = p->next; | |
291 | continue; | |
292 | } | |
293 | ||
294 | spin_unlock(&erofs_sb_list_lock); | |
295 | sbi->shrinker_run_no = run_no; | |
296 | ||
47e541a1 | 297 | freed += erofs_shrink_workstation(sbi, nr, false); |
a1581312 GX |
298 | |
299 | spin_lock(&erofs_sb_list_lock); | |
300 | /* Get the next list element before we move this one */ | |
301 | p = p->next; | |
302 | ||
303 | /* | |
304 | * Move this one to the end of the list to provide some | |
305 | * fairness. | |
306 | */ | |
307 | list_move_tail(&sbi->list, &erofs_sb_list); | |
308 | mutex_unlock(&sbi->umount_mutex); | |
309 | ||
310 | if (freed >= nr) | |
311 | break; | |
312 | } | |
313 | spin_unlock(&erofs_sb_list_lock); | |
314 | return freed; | |
2497ee41 GX |
315 | } |
316 | ||
22fe04a7 | 317 | static struct shrinker erofs_shrinker_info = { |
d55bc7ba GX |
318 | .scan_objects = erofs_shrink_scan, |
319 | .count_objects = erofs_shrink_count, | |
320 | .seeks = DEFAULT_SEEKS, | |
321 | }; | |
322 | ||
22fe04a7 GX |
323 | int __init erofs_init_shrinker(void) |
324 | { | |
325 | return register_shrinker(&erofs_shrinker_info); | |
326 | } | |
327 | ||
328 | void erofs_exit_shrinker(void) | |
329 | { | |
330 | unregister_shrinker(&erofs_shrinker_info); | |
331 | } | |
332 | #endif /* !CONFIG_EROFS_FS_ZIP */ | |
333 |