Commit | Line | Data |
---|---|---|
65294c1f JL |
1 | /* |
2 | * Open file cache. | |
3 | * | |
4 | * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> | |
5 | */ | |
6 | ||
7 | #include <linux/hash.h> | |
8 | #include <linux/slab.h> | |
65294c1f JL |
9 | #include <linux/file.h> |
10 | #include <linux/sched.h> | |
11 | #include <linux/list_lru.h> | |
12 | #include <linux/fsnotify_backend.h> | |
13 | #include <linux/fsnotify.h> | |
14 | #include <linux/seq_file.h> | |
15 | ||
16 | #include "vfs.h" | |
17 | #include "nfsd.h" | |
18 | #include "nfsfh.h" | |
5e113224 | 19 | #include "netns.h" |
65294c1f JL |
20 | #include "filecache.h" |
21 | #include "trace.h" | |
22 | ||
23 | #define NFSDDBG_FACILITY NFSDDBG_FH | |
24 | ||
25 | /* FIXME: dynamically size this for the machine somehow? */ | |
26 | #define NFSD_FILE_HASH_BITS 12 | |
27 | #define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) | |
28 | #define NFSD_LAUNDRETTE_DELAY (2 * HZ) | |
29 | ||
30 | #define NFSD_FILE_LRU_RESCAN (0) | |
31 | #define NFSD_FILE_SHUTDOWN (1) | |
32 | #define NFSD_FILE_LRU_THRESHOLD (4096UL) | |
33 | #define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) | |
34 | ||
35 | /* We only care about NFSD_MAY_READ/WRITE for this cache */ | |
36 | #define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) | |
37 | ||
38 | struct nfsd_fcache_bucket { | |
39 | struct hlist_head nfb_head; | |
40 | spinlock_t nfb_lock; | |
41 | unsigned int nfb_count; | |
42 | unsigned int nfb_maxcount; | |
43 | }; | |
44 | ||
45 | static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); | |
46 | ||
47 | static struct kmem_cache *nfsd_file_slab; | |
48 | static struct kmem_cache *nfsd_file_mark_slab; | |
49 | static struct nfsd_fcache_bucket *nfsd_file_hashtbl; | |
50 | static struct list_lru nfsd_file_lru; | |
51 | static long nfsd_file_lru_flags; | |
52 | static struct fsnotify_group *nfsd_file_fsnotify_group; | |
53 | static atomic_long_t nfsd_filecache_count; | |
54 | static struct delayed_work nfsd_filecache_laundrette; | |
55 | ||
56 | enum nfsd_file_laundrette_ctl { | |
57 | NFSD_FILE_LAUNDRETTE_NOFLUSH = 0, | |
58 | NFSD_FILE_LAUNDRETTE_MAY_FLUSH | |
59 | }; | |
60 | ||
61 | static void | |
62 | nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl) | |
63 | { | |
64 | long count = atomic_long_read(&nfsd_filecache_count); | |
65 | ||
66 | if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) | |
67 | return; | |
68 | ||
69 | /* Be more aggressive about scanning if over the threshold */ | |
70 | if (count > NFSD_FILE_LRU_THRESHOLD) | |
71 | mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0); | |
72 | else | |
73 | schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY); | |
74 | ||
75 | if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH) | |
76 | return; | |
77 | ||
78 | /* ...and don't delay flushing if we're out of control */ | |
79 | if (count >= NFSD_FILE_LRU_LIMIT) | |
80 | flush_delayed_work(&nfsd_filecache_laundrette); | |
81 | } | |
82 | ||
83 | static void | |
84 | nfsd_file_slab_free(struct rcu_head *rcu) | |
85 | { | |
86 | struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); | |
87 | ||
88 | put_cred(nf->nf_cred); | |
89 | kmem_cache_free(nfsd_file_slab, nf); | |
90 | } | |
91 | ||
92 | static void | |
93 | nfsd_file_mark_free(struct fsnotify_mark *mark) | |
94 | { | |
95 | struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, | |
96 | nfm_mark); | |
97 | ||
98 | kmem_cache_free(nfsd_file_mark_slab, nfm); | |
99 | } | |
100 | ||
101 | static struct nfsd_file_mark * | |
102 | nfsd_file_mark_get(struct nfsd_file_mark *nfm) | |
103 | { | |
104 | if (!atomic_inc_not_zero(&nfm->nfm_ref)) | |
105 | return NULL; | |
106 | return nfm; | |
107 | } | |
108 | ||
109 | static void | |
110 | nfsd_file_mark_put(struct nfsd_file_mark *nfm) | |
111 | { | |
112 | if (atomic_dec_and_test(&nfm->nfm_ref)) { | |
113 | ||
114 | fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); | |
115 | fsnotify_put_mark(&nfm->nfm_mark); | |
116 | } | |
117 | } | |
118 | ||
119 | static struct nfsd_file_mark * | |
120 | nfsd_file_mark_find_or_create(struct nfsd_file *nf) | |
121 | { | |
122 | int err; | |
123 | struct fsnotify_mark *mark; | |
124 | struct nfsd_file_mark *nfm = NULL, *new; | |
125 | struct inode *inode = nf->nf_inode; | |
126 | ||
127 | do { | |
128 | mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); | |
129 | mark = fsnotify_find_mark(&inode->i_fsnotify_marks, | |
130 | nfsd_file_fsnotify_group); | |
131 | if (mark) { | |
132 | nfm = nfsd_file_mark_get(container_of(mark, | |
133 | struct nfsd_file_mark, | |
134 | nfm_mark)); | |
135 | mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); | |
136 | fsnotify_put_mark(mark); | |
137 | if (likely(nfm)) | |
138 | break; | |
139 | } else | |
140 | mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); | |
141 | ||
142 | /* allocate a new nfm */ | |
143 | new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); | |
144 | if (!new) | |
145 | return NULL; | |
146 | fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); | |
147 | new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; | |
148 | atomic_set(&new->nfm_ref, 1); | |
149 | ||
150 | err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); | |
151 | ||
152 | /* | |
153 | * If the add was successful, then return the object. | |
154 | * Otherwise, we need to put the reference we hold on the | |
155 | * nfm_mark. The fsnotify code will take a reference and put | |
156 | * it on failure, so we can't just free it directly. It's also | |
157 | * not safe to call fsnotify_destroy_mark on it as the | |
158 | * mark->group will be NULL. Thus, we can't let the nfm_ref | |
159 | * counter drive the destruction at this point. | |
160 | */ | |
161 | if (likely(!err)) | |
162 | nfm = new; | |
163 | else | |
164 | fsnotify_put_mark(&new->nfm_mark); | |
165 | } while (unlikely(err == -EEXIST)); | |
166 | ||
167 | return nfm; | |
168 | } | |
169 | ||
170 | static struct nfsd_file * | |
5e113224 TM |
171 | nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, |
172 | struct net *net) | |
65294c1f JL |
173 | { |
174 | struct nfsd_file *nf; | |
175 | ||
176 | nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); | |
177 | if (nf) { | |
178 | INIT_HLIST_NODE(&nf->nf_node); | |
179 | INIT_LIST_HEAD(&nf->nf_lru); | |
180 | nf->nf_file = NULL; | |
181 | nf->nf_cred = get_current_cred(); | |
5e113224 | 182 | nf->nf_net = net; |
65294c1f JL |
183 | nf->nf_flags = 0; |
184 | nf->nf_inode = inode; | |
185 | nf->nf_hashval = hashval; | |
186 | atomic_set(&nf->nf_ref, 1); | |
187 | nf->nf_may = may & NFSD_FILE_MAY_MASK; | |
188 | if (may & NFSD_MAY_NOT_BREAK_LEASE) { | |
189 | if (may & NFSD_MAY_WRITE) | |
190 | __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); | |
191 | if (may & NFSD_MAY_READ) | |
192 | __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); | |
193 | } | |
194 | nf->nf_mark = NULL; | |
195 | trace_nfsd_file_alloc(nf); | |
196 | } | |
197 | return nf; | |
198 | } | |
199 | ||
200 | static bool | |
201 | nfsd_file_free(struct nfsd_file *nf) | |
202 | { | |
203 | bool flush = false; | |
204 | ||
205 | trace_nfsd_file_put_final(nf); | |
206 | if (nf->nf_mark) | |
207 | nfsd_file_mark_put(nf->nf_mark); | |
208 | if (nf->nf_file) { | |
209 | get_file(nf->nf_file); | |
210 | filp_close(nf->nf_file, NULL); | |
211 | fput(nf->nf_file); | |
212 | flush = true; | |
213 | } | |
214 | call_rcu(&nf->nf_rcu, nfsd_file_slab_free); | |
215 | return flush; | |
216 | } | |
217 | ||
055b24a8 TM |
218 | static bool |
219 | nfsd_file_check_writeback(struct nfsd_file *nf) | |
220 | { | |
221 | struct file *file = nf->nf_file; | |
222 | struct address_space *mapping; | |
223 | ||
224 | if (!file || !(file->f_mode & FMODE_WRITE)) | |
225 | return false; | |
226 | mapping = file->f_mapping; | |
227 | return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || | |
228 | mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); | |
229 | } | |
230 | ||
231 | static int | |
232 | nfsd_file_check_write_error(struct nfsd_file *nf) | |
233 | { | |
234 | struct file *file = nf->nf_file; | |
235 | ||
236 | if (!file || !(file->f_mode & FMODE_WRITE)) | |
237 | return 0; | |
238 | return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); | |
239 | } | |
240 | ||
241 | static bool | |
242 | nfsd_file_in_use(struct nfsd_file *nf) | |
243 | { | |
244 | return nfsd_file_check_writeback(nf) || | |
245 | nfsd_file_check_write_error(nf); | |
246 | } | |
247 | ||
65294c1f JL |
248 | static void |
249 | nfsd_file_do_unhash(struct nfsd_file *nf) | |
250 | { | |
251 | lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | |
252 | ||
253 | trace_nfsd_file_unhash(nf); | |
254 | ||
055b24a8 TM |
255 | if (nfsd_file_check_write_error(nf)) |
256 | nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); | |
65294c1f JL |
257 | --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; |
258 | hlist_del_rcu(&nf->nf_node); | |
259 | if (!list_empty(&nf->nf_lru)) | |
260 | list_lru_del(&nfsd_file_lru, &nf->nf_lru); | |
261 | atomic_long_dec(&nfsd_filecache_count); | |
262 | } | |
263 | ||
264 | static bool | |
265 | nfsd_file_unhash(struct nfsd_file *nf) | |
266 | { | |
267 | if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { | |
268 | nfsd_file_do_unhash(nf); | |
269 | return true; | |
270 | } | |
271 | return false; | |
272 | } | |
273 | ||
274 | /* | |
275 | * Return true if the file was unhashed. | |
276 | */ | |
277 | static bool | |
278 | nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) | |
279 | { | |
280 | lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | |
281 | ||
282 | trace_nfsd_file_unhash_and_release_locked(nf); | |
283 | if (!nfsd_file_unhash(nf)) | |
284 | return false; | |
285 | /* keep final reference for nfsd_file_lru_dispose */ | |
286 | if (atomic_add_unless(&nf->nf_ref, -1, 1)) | |
287 | return true; | |
288 | ||
289 | list_add(&nf->nf_lru, dispose); | |
290 | return true; | |
291 | } | |
292 | ||
293 | static int | |
294 | nfsd_file_put_noref(struct nfsd_file *nf) | |
295 | { | |
296 | int count; | |
297 | trace_nfsd_file_put(nf); | |
298 | ||
299 | count = atomic_dec_return(&nf->nf_ref); | |
300 | if (!count) { | |
301 | WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); | |
302 | nfsd_file_free(nf); | |
303 | } | |
304 | return count; | |
305 | } | |
306 | ||
307 | void | |
308 | nfsd_file_put(struct nfsd_file *nf) | |
309 | { | |
310 | bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; | |
055b24a8 | 311 | bool unused = !nfsd_file_in_use(nf); |
65294c1f JL |
312 | |
313 | set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); | |
055b24a8 | 314 | if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused) |
65294c1f JL |
315 | nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH); |
316 | } | |
317 | ||
318 | struct nfsd_file * | |
319 | nfsd_file_get(struct nfsd_file *nf) | |
320 | { | |
321 | if (likely(atomic_inc_not_zero(&nf->nf_ref))) | |
322 | return nf; | |
323 | return NULL; | |
324 | } | |
325 | ||
326 | static void | |
327 | nfsd_file_dispose_list(struct list_head *dispose) | |
328 | { | |
329 | struct nfsd_file *nf; | |
330 | ||
331 | while(!list_empty(dispose)) { | |
332 | nf = list_first_entry(dispose, struct nfsd_file, nf_lru); | |
333 | list_del(&nf->nf_lru); | |
334 | nfsd_file_put_noref(nf); | |
335 | } | |
336 | } | |
337 | ||
338 | static void | |
339 | nfsd_file_dispose_list_sync(struct list_head *dispose) | |
340 | { | |
341 | bool flush = false; | |
342 | struct nfsd_file *nf; | |
343 | ||
344 | while(!list_empty(dispose)) { | |
345 | nf = list_first_entry(dispose, struct nfsd_file, nf_lru); | |
346 | list_del(&nf->nf_lru); | |
347 | if (!atomic_dec_and_test(&nf->nf_ref)) | |
348 | continue; | |
349 | if (nfsd_file_free(nf)) | |
350 | flush = true; | |
351 | } | |
352 | if (flush) | |
353 | flush_delayed_fput(); | |
354 | } | |
355 | ||
356 | /* | |
357 | * Note this can deadlock with nfsd_file_cache_purge. | |
358 | */ | |
359 | static enum lru_status | |
360 | nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, | |
361 | spinlock_t *lock, void *arg) | |
362 | __releases(lock) | |
363 | __acquires(lock) | |
364 | { | |
365 | struct list_head *head = arg; | |
366 | struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); | |
367 | ||
368 | /* | |
369 | * Do a lockless refcount check. The hashtable holds one reference, so | |
370 | * we look to see if anything else has a reference, or if any have | |
371 | * been put since the shrinker last ran. Those don't get unhashed and | |
372 | * released. | |
373 | * | |
374 | * Note that in the put path, we set the flag and then decrement the | |
375 | * counter. Here we check the counter and then test and clear the flag. | |
376 | * That order is deliberate to ensure that we can do this locklessly. | |
377 | */ | |
378 | if (atomic_read(&nf->nf_ref) > 1) | |
379 | goto out_skip; | |
055b24a8 TM |
380 | |
381 | /* | |
382 | * Don't throw out files that are still undergoing I/O or | |
383 | * that have uncleared errors pending. | |
384 | */ | |
385 | if (nfsd_file_check_writeback(nf)) | |
386 | goto out_skip; | |
387 | ||
65294c1f JL |
388 | if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) |
389 | goto out_rescan; | |
390 | ||
391 | if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) | |
392 | goto out_skip; | |
393 | ||
394 | list_lru_isolate_move(lru, &nf->nf_lru, head); | |
395 | return LRU_REMOVED; | |
396 | out_rescan: | |
397 | set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags); | |
398 | out_skip: | |
399 | return LRU_SKIP; | |
400 | } | |
401 | ||
402 | static void | |
403 | nfsd_file_lru_dispose(struct list_head *head) | |
404 | { | |
405 | while(!list_empty(head)) { | |
406 | struct nfsd_file *nf = list_first_entry(head, | |
407 | struct nfsd_file, nf_lru); | |
408 | list_del_init(&nf->nf_lru); | |
409 | spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | |
410 | nfsd_file_do_unhash(nf); | |
411 | spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); | |
412 | nfsd_file_put_noref(nf); | |
413 | } | |
414 | } | |
415 | ||
416 | static unsigned long | |
417 | nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) | |
418 | { | |
419 | return list_lru_count(&nfsd_file_lru); | |
420 | } | |
421 | ||
422 | static unsigned long | |
423 | nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) | |
424 | { | |
425 | LIST_HEAD(head); | |
426 | unsigned long ret; | |
427 | ||
428 | ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head); | |
429 | nfsd_file_lru_dispose(&head); | |
430 | return ret; | |
431 | } | |
432 | ||
433 | static struct shrinker nfsd_file_shrinker = { | |
434 | .scan_objects = nfsd_file_lru_scan, | |
435 | .count_objects = nfsd_file_lru_count, | |
436 | .seeks = 1, | |
437 | }; | |
438 | ||
439 | static void | |
440 | __nfsd_file_close_inode(struct inode *inode, unsigned int hashval, | |
441 | struct list_head *dispose) | |
442 | { | |
443 | struct nfsd_file *nf; | |
444 | struct hlist_node *tmp; | |
445 | ||
446 | spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
447 | hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { | |
448 | if (inode == nf->nf_inode) | |
449 | nfsd_file_unhash_and_release_locked(nf, dispose); | |
450 | } | |
451 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
452 | } | |
453 | ||
454 | /** | |
455 | * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file | |
456 | * @inode: inode of the file to attempt to remove | |
457 | * | |
458 | * Walk the whole hash bucket, looking for any files that correspond to "inode". | |
459 | * If any do, then unhash them and put the hashtable reference to them and | |
460 | * destroy any that had their last reference put. Also ensure that any of the | |
461 | * fputs also have their final __fput done as well. | |
462 | */ | |
463 | void | |
464 | nfsd_file_close_inode_sync(struct inode *inode) | |
465 | { | |
466 | unsigned int hashval = (unsigned int)hash_long(inode->i_ino, | |
467 | NFSD_FILE_HASH_BITS); | |
468 | LIST_HEAD(dispose); | |
469 | ||
470 | __nfsd_file_close_inode(inode, hashval, &dispose); | |
471 | trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); | |
472 | nfsd_file_dispose_list_sync(&dispose); | |
473 | } | |
474 | ||
475 | /** | |
476 | * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file | |
477 | * @inode: inode of the file to attempt to remove | |
478 | * | |
479 | * Walk the whole hash bucket, looking for any files that correspond to "inode". | |
480 | * If any do, then unhash them and put the hashtable reference to them and | |
481 | * destroy any that had their last reference put. | |
482 | */ | |
483 | static void | |
484 | nfsd_file_close_inode(struct inode *inode) | |
485 | { | |
486 | unsigned int hashval = (unsigned int)hash_long(inode->i_ino, | |
487 | NFSD_FILE_HASH_BITS); | |
488 | LIST_HEAD(dispose); | |
489 | ||
490 | __nfsd_file_close_inode(inode, hashval, &dispose); | |
491 | trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); | |
492 | nfsd_file_dispose_list(&dispose); | |
493 | } | |
494 | ||
495 | /** | |
496 | * nfsd_file_delayed_close - close unused nfsd_files | |
497 | * @work: dummy | |
498 | * | |
499 | * Walk the LRU list and close any entries that have not been used since | |
500 | * the last scan. | |
501 | * | |
502 | * Note this can deadlock with nfsd_file_cache_purge. | |
503 | */ | |
504 | static void | |
505 | nfsd_file_delayed_close(struct work_struct *work) | |
506 | { | |
507 | LIST_HEAD(head); | |
508 | ||
509 | list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX); | |
510 | ||
511 | if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags)) | |
512 | nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH); | |
513 | ||
514 | if (!list_empty(&head)) { | |
515 | nfsd_file_lru_dispose(&head); | |
516 | flush_delayed_fput(); | |
517 | } | |
518 | } | |
519 | ||
520 | static int | |
521 | nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, | |
522 | void *data) | |
523 | { | |
524 | struct file_lock *fl = data; | |
525 | ||
526 | /* Only close files for F_SETLEASE leases */ | |
527 | if (fl->fl_flags & FL_LEASE) | |
528 | nfsd_file_close_inode_sync(file_inode(fl->fl_file)); | |
529 | return 0; | |
530 | } | |
531 | ||
532 | static struct notifier_block nfsd_file_lease_notifier = { | |
533 | .notifier_call = nfsd_file_lease_notifier_call, | |
534 | }; | |
535 | ||
536 | static int | |
537 | nfsd_file_fsnotify_handle_event(struct fsnotify_group *group, | |
538 | struct inode *inode, | |
539 | u32 mask, const void *data, int data_type, | |
540 | const struct qstr *file_name, u32 cookie, | |
541 | struct fsnotify_iter_info *iter_info) | |
542 | { | |
543 | trace_nfsd_file_fsnotify_handle_event(inode, mask); | |
544 | ||
545 | /* Should be no marks on non-regular files */ | |
546 | if (!S_ISREG(inode->i_mode)) { | |
547 | WARN_ON_ONCE(1); | |
548 | return 0; | |
549 | } | |
550 | ||
551 | /* don't close files if this was not the last link */ | |
552 | if (mask & FS_ATTRIB) { | |
553 | if (inode->i_nlink) | |
554 | return 0; | |
555 | } | |
556 | ||
557 | nfsd_file_close_inode(inode); | |
558 | return 0; | |
559 | } | |
560 | ||
561 | ||
562 | static const struct fsnotify_ops nfsd_file_fsnotify_ops = { | |
563 | .handle_event = nfsd_file_fsnotify_handle_event, | |
564 | .free_mark = nfsd_file_mark_free, | |
565 | }; | |
566 | ||
567 | int | |
568 | nfsd_file_cache_init(void) | |
569 | { | |
570 | int ret = -ENOMEM; | |
571 | unsigned int i; | |
572 | ||
573 | clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); | |
574 | ||
575 | if (nfsd_file_hashtbl) | |
576 | return 0; | |
577 | ||
578 | nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE, | |
579 | sizeof(*nfsd_file_hashtbl), GFP_KERNEL); | |
580 | if (!nfsd_file_hashtbl) { | |
581 | pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); | |
582 | goto out_err; | |
583 | } | |
584 | ||
585 | nfsd_file_slab = kmem_cache_create("nfsd_file", | |
586 | sizeof(struct nfsd_file), 0, 0, NULL); | |
587 | if (!nfsd_file_slab) { | |
588 | pr_err("nfsd: unable to create nfsd_file_slab\n"); | |
589 | goto out_err; | |
590 | } | |
591 | ||
592 | nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", | |
593 | sizeof(struct nfsd_file_mark), 0, 0, NULL); | |
594 | if (!nfsd_file_mark_slab) { | |
595 | pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); | |
596 | goto out_err; | |
597 | } | |
598 | ||
599 | ||
600 | ret = list_lru_init(&nfsd_file_lru); | |
601 | if (ret) { | |
602 | pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); | |
603 | goto out_err; | |
604 | } | |
605 | ||
606 | ret = register_shrinker(&nfsd_file_shrinker); | |
607 | if (ret) { | |
608 | pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); | |
609 | goto out_lru; | |
610 | } | |
611 | ||
612 | ret = lease_register_notifier(&nfsd_file_lease_notifier); | |
613 | if (ret) { | |
614 | pr_err("nfsd: unable to register lease notifier: %d\n", ret); | |
615 | goto out_shrinker; | |
616 | } | |
617 | ||
618 | nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); | |
619 | if (IS_ERR(nfsd_file_fsnotify_group)) { | |
620 | pr_err("nfsd: unable to create fsnotify group: %ld\n", | |
621 | PTR_ERR(nfsd_file_fsnotify_group)); | |
622 | nfsd_file_fsnotify_group = NULL; | |
623 | goto out_notifier; | |
624 | } | |
625 | ||
626 | for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { | |
627 | INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); | |
628 | spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); | |
629 | } | |
630 | ||
631 | INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close); | |
632 | out: | |
633 | return ret; | |
634 | out_notifier: | |
635 | lease_unregister_notifier(&nfsd_file_lease_notifier); | |
636 | out_shrinker: | |
637 | unregister_shrinker(&nfsd_file_shrinker); | |
638 | out_lru: | |
639 | list_lru_destroy(&nfsd_file_lru); | |
640 | out_err: | |
641 | kmem_cache_destroy(nfsd_file_slab); | |
642 | nfsd_file_slab = NULL; | |
643 | kmem_cache_destroy(nfsd_file_mark_slab); | |
644 | nfsd_file_mark_slab = NULL; | |
645 | kfree(nfsd_file_hashtbl); | |
646 | nfsd_file_hashtbl = NULL; | |
647 | goto out; | |
648 | } | |
649 | ||
650 | /* | |
651 | * Note this can deadlock with nfsd_file_lru_cb. | |
652 | */ | |
653 | void | |
5e113224 | 654 | nfsd_file_cache_purge(struct net *net) |
65294c1f JL |
655 | { |
656 | unsigned int i; | |
657 | struct nfsd_file *nf; | |
5e113224 | 658 | struct hlist_node *next; |
65294c1f JL |
659 | LIST_HEAD(dispose); |
660 | bool del; | |
661 | ||
662 | if (!nfsd_file_hashtbl) | |
663 | return; | |
664 | ||
665 | for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { | |
5e113224 TM |
666 | struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; |
667 | ||
668 | spin_lock(&nfb->nfb_lock); | |
669 | hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { | |
670 | if (net && nf->nf_net != net) | |
671 | continue; | |
65294c1f JL |
672 | del = nfsd_file_unhash_and_release_locked(nf, &dispose); |
673 | ||
674 | /* | |
675 | * Deadlock detected! Something marked this entry as | |
676 | * unhased, but hasn't removed it from the hash list. | |
677 | */ | |
678 | WARN_ON_ONCE(!del); | |
679 | } | |
5e113224 | 680 | spin_unlock(&nfb->nfb_lock); |
65294c1f JL |
681 | nfsd_file_dispose_list(&dispose); |
682 | } | |
683 | } | |
684 | ||
685 | void | |
686 | nfsd_file_cache_shutdown(void) | |
687 | { | |
688 | LIST_HEAD(dispose); | |
689 | ||
690 | set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); | |
691 | ||
692 | lease_unregister_notifier(&nfsd_file_lease_notifier); | |
693 | unregister_shrinker(&nfsd_file_shrinker); | |
694 | /* | |
695 | * make sure all callers of nfsd_file_lru_cb are done before | |
696 | * calling nfsd_file_cache_purge | |
697 | */ | |
698 | cancel_delayed_work_sync(&nfsd_filecache_laundrette); | |
5e113224 | 699 | nfsd_file_cache_purge(NULL); |
65294c1f JL |
700 | list_lru_destroy(&nfsd_file_lru); |
701 | rcu_barrier(); | |
702 | fsnotify_put_group(nfsd_file_fsnotify_group); | |
703 | nfsd_file_fsnotify_group = NULL; | |
704 | kmem_cache_destroy(nfsd_file_slab); | |
705 | nfsd_file_slab = NULL; | |
706 | fsnotify_wait_marks_destroyed(); | |
707 | kmem_cache_destroy(nfsd_file_mark_slab); | |
708 | nfsd_file_mark_slab = NULL; | |
709 | kfree(nfsd_file_hashtbl); | |
710 | nfsd_file_hashtbl = NULL; | |
711 | } | |
712 | ||
713 | static bool | |
714 | nfsd_match_cred(const struct cred *c1, const struct cred *c2) | |
715 | { | |
716 | int i; | |
717 | ||
718 | if (!uid_eq(c1->fsuid, c2->fsuid)) | |
719 | return false; | |
720 | if (!gid_eq(c1->fsgid, c2->fsgid)) | |
721 | return false; | |
722 | if (c1->group_info == NULL || c2->group_info == NULL) | |
723 | return c1->group_info == c2->group_info; | |
724 | if (c1->group_info->ngroups != c2->group_info->ngroups) | |
725 | return false; | |
726 | for (i = 0; i < c1->group_info->ngroups; i++) { | |
727 | if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) | |
728 | return false; | |
729 | } | |
730 | return true; | |
731 | } | |
732 | ||
733 | static struct nfsd_file * | |
734 | nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, | |
5e113224 | 735 | unsigned int hashval, struct net *net) |
65294c1f JL |
736 | { |
737 | struct nfsd_file *nf; | |
738 | unsigned char need = may_flags & NFSD_FILE_MAY_MASK; | |
739 | ||
740 | hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, | |
741 | nf_node) { | |
742 | if ((need & nf->nf_may) != need) | |
743 | continue; | |
744 | if (nf->nf_inode != inode) | |
745 | continue; | |
5e113224 TM |
746 | if (nf->nf_net != net) |
747 | continue; | |
65294c1f JL |
748 | if (!nfsd_match_cred(nf->nf_cred, current_cred())) |
749 | continue; | |
750 | if (nfsd_file_get(nf) != NULL) | |
751 | return nf; | |
752 | } | |
753 | return NULL; | |
754 | } | |
755 | ||
756 | /** | |
757 | * nfsd_file_is_cached - are there any cached open files for this fh? | |
758 | * @inode: inode of the file to check | |
759 | * | |
760 | * Scan the hashtable for open files that match this fh. Returns true if there | |
761 | * are any, and false if not. | |
762 | */ | |
763 | bool | |
764 | nfsd_file_is_cached(struct inode *inode) | |
765 | { | |
766 | bool ret = false; | |
767 | struct nfsd_file *nf; | |
768 | unsigned int hashval; | |
769 | ||
770 | hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); | |
771 | ||
772 | rcu_read_lock(); | |
773 | hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, | |
774 | nf_node) { | |
775 | if (inode == nf->nf_inode) { | |
776 | ret = true; | |
777 | break; | |
778 | } | |
779 | } | |
780 | rcu_read_unlock(); | |
781 | trace_nfsd_file_is_cached(inode, hashval, (int)ret); | |
782 | return ret; | |
783 | } | |
784 | ||
785 | __be32 | |
786 | nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, | |
787 | unsigned int may_flags, struct nfsd_file **pnf) | |
788 | { | |
789 | __be32 status; | |
5e113224 | 790 | struct net *net = SVC_NET(rqstp); |
65294c1f JL |
791 | struct nfsd_file *nf, *new; |
792 | struct inode *inode; | |
793 | unsigned int hashval; | |
794 | ||
795 | /* FIXME: skip this if fh_dentry is already set? */ | |
796 | status = fh_verify(rqstp, fhp, S_IFREG, | |
797 | may_flags|NFSD_MAY_OWNER_OVERRIDE); | |
798 | if (status != nfs_ok) | |
799 | return status; | |
800 | ||
801 | inode = d_inode(fhp->fh_dentry); | |
802 | hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); | |
803 | retry: | |
804 | rcu_read_lock(); | |
5e113224 | 805 | nf = nfsd_file_find_locked(inode, may_flags, hashval, net); |
65294c1f JL |
806 | rcu_read_unlock(); |
807 | if (nf) | |
808 | goto wait_for_construction; | |
809 | ||
5e113224 | 810 | new = nfsd_file_alloc(inode, may_flags, hashval, net); |
65294c1f JL |
811 | if (!new) { |
812 | trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, | |
813 | NULL, nfserr_jukebox); | |
814 | return nfserr_jukebox; | |
815 | } | |
816 | ||
817 | spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
5e113224 | 818 | nf = nfsd_file_find_locked(inode, may_flags, hashval, net); |
65294c1f JL |
819 | if (nf == NULL) |
820 | goto open_file; | |
821 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
822 | nfsd_file_slab_free(&new->nf_rcu); | |
823 | ||
824 | wait_for_construction: | |
825 | wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); | |
826 | ||
827 | /* Did construction of this file fail? */ | |
828 | if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { | |
829 | nfsd_file_put_noref(nf); | |
830 | goto retry; | |
831 | } | |
832 | ||
833 | this_cpu_inc(nfsd_file_cache_hits); | |
834 | ||
835 | if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { | |
836 | bool write = (may_flags & NFSD_MAY_WRITE); | |
837 | ||
838 | if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || | |
839 | (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { | |
840 | status = nfserrno(nfsd_open_break_lease( | |
841 | file_inode(nf->nf_file), may_flags)); | |
842 | if (status == nfs_ok) { | |
843 | clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); | |
844 | if (write) | |
845 | clear_bit(NFSD_FILE_BREAK_WRITE, | |
846 | &nf->nf_flags); | |
847 | } | |
848 | } | |
849 | } | |
850 | out: | |
851 | if (status == nfs_ok) { | |
852 | *pnf = nf; | |
853 | } else { | |
854 | nfsd_file_put(nf); | |
855 | nf = NULL; | |
856 | } | |
857 | ||
858 | trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); | |
859 | return status; | |
860 | open_file: | |
861 | nf = new; | |
862 | /* Take reference for the hashtable */ | |
863 | atomic_inc(&nf->nf_ref); | |
864 | __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); | |
865 | __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); | |
866 | list_lru_add(&nfsd_file_lru, &nf->nf_lru); | |
867 | hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); | |
868 | ++nfsd_file_hashtbl[hashval].nfb_count; | |
869 | nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, | |
870 | nfsd_file_hashtbl[hashval].nfb_count); | |
871 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
872 | atomic_long_inc(&nfsd_filecache_count); | |
873 | ||
874 | nf->nf_mark = nfsd_file_mark_find_or_create(nf); | |
875 | if (nf->nf_mark) | |
876 | status = nfsd_open_verified(rqstp, fhp, S_IFREG, | |
877 | may_flags, &nf->nf_file); | |
878 | else | |
879 | status = nfserr_jukebox; | |
880 | /* | |
881 | * If construction failed, or we raced with a call to unlink() | |
882 | * then unhash. | |
883 | */ | |
884 | if (status != nfs_ok || inode->i_nlink == 0) { | |
885 | bool do_free; | |
886 | spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
887 | do_free = nfsd_file_unhash(nf); | |
888 | spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); | |
889 | if (do_free) | |
890 | nfsd_file_put_noref(nf); | |
891 | } | |
892 | clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); | |
893 | smp_mb__after_atomic(); | |
894 | wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); | |
895 | goto out; | |
896 | } | |
897 | ||
898 | /* | |
899 | * Note that fields may be added, removed or reordered in the future. Programs | |
900 | * scraping this file for info should test the labels to ensure they're | |
901 | * getting the correct field. | |
902 | */ | |
903 | static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) | |
904 | { | |
905 | unsigned int i, count = 0, longest = 0; | |
906 | unsigned long hits = 0; | |
907 | ||
908 | /* | |
909 | * No need for spinlocks here since we're not terribly interested in | |
910 | * accuracy. We do take the nfsd_mutex simply to ensure that we | |
911 | * don't end up racing with server shutdown | |
912 | */ | |
913 | mutex_lock(&nfsd_mutex); | |
914 | if (nfsd_file_hashtbl) { | |
915 | for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { | |
916 | count += nfsd_file_hashtbl[i].nfb_count; | |
917 | longest = max(longest, nfsd_file_hashtbl[i].nfb_count); | |
918 | } | |
919 | } | |
920 | mutex_unlock(&nfsd_mutex); | |
921 | ||
922 | for_each_possible_cpu(i) | |
923 | hits += per_cpu(nfsd_file_cache_hits, i); | |
924 | ||
925 | seq_printf(m, "total entries: %u\n", count); | |
926 | seq_printf(m, "longest chain: %u\n", longest); | |
927 | seq_printf(m, "cache hits: %lu\n", hits); | |
928 | return 0; | |
929 | } | |
930 | ||
931 | int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) | |
932 | { | |
933 | return single_open(file, nfsd_file_cache_stats_show, NULL); | |
934 | } |