fuse: get rid of ff->readdir.lock
[linux-2.6-block.git] / fs / fuse / readdir.c
CommitLineData
d123d8e1
MS
1/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9
10#include "fuse_i.h"
261aaba7 11#include <linux/iversion.h>
d123d8e1 12#include <linux/posix_acl.h>
69e34551
MS
13#include <linux/pagemap.h>
14#include <linux/highmem.h>
d123d8e1
MS
15
16static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17{
18 struct fuse_conn *fc = get_fuse_conn(dir);
19 struct fuse_inode *fi = get_fuse_inode(dir);
20
21 if (!fc->do_readdirplus)
22 return false;
23 if (!fc->readdirplus_auto)
24 return true;
25 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 return true;
27 if (ctx->pos == 0)
28 return true;
29 return false;
30}
31
69e34551
MS
32static void fuse_add_dirent_to_cache(struct file *file,
33 struct fuse_dirent *dirent, loff_t pos)
34{
35 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 pgoff_t index;
38 struct page *page;
39 loff_t size;
3494927e 40 u64 version;
69e34551
MS
41 unsigned int offset;
42 void *addr;
43
44 spin_lock(&fi->rdc.lock);
45 /*
46 * Is cache already completed? Or this entry does not go at the end of
47 * cache?
48 */
49 if (fi->rdc.cached || pos != fi->rdc.pos) {
50 spin_unlock(&fi->rdc.lock);
51 return;
52 }
3494927e 53 version = fi->rdc.version;
69e34551
MS
54 size = fi->rdc.size;
55 offset = size & ~PAGE_MASK;
56 index = size >> PAGE_SHIFT;
57 /* Dirent doesn't fit in current page? Jump to next page. */
58 if (offset + reclen > PAGE_SIZE) {
59 index++;
60 offset = 0;
61 }
62 spin_unlock(&fi->rdc.lock);
63
64 if (offset) {
65 page = find_lock_page(file->f_mapping, index);
66 } else {
67 page = find_or_create_page(file->f_mapping, index,
68 mapping_gfp_mask(file->f_mapping));
69 }
70 if (!page)
71 return;
72
73 spin_lock(&fi->rdc.lock);
74 /* Raced with another readdir */
3494927e
MS
75 if (fi->rdc.version != version || fi->rdc.size != size ||
76 WARN_ON(fi->rdc.pos != pos))
69e34551
MS
77 goto unlock;
78
5fe0fc9f 79 addr = kmap_local_page(page);
9fa248c6 80 if (!offset) {
69e34551 81 clear_page(addr);
9fa248c6
MS
82 SetPageUptodate(page);
83 }
69e34551 84 memcpy(addr + offset, dirent, reclen);
5fe0fc9f 85 kunmap_local(addr);
69e34551
MS
86 fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
87 fi->rdc.pos = dirent->off;
88unlock:
89 spin_unlock(&fi->rdc.lock);
90 unlock_page(page);
91 put_page(page);
92}
93
94static void fuse_readdir_cache_end(struct file *file, loff_t pos)
95{
96 struct fuse_inode *fi = get_fuse_inode(file_inode(file));
97 loff_t end;
98
99 spin_lock(&fi->rdc.lock);
100 /* does cache end position match current position? */
101 if (fi->rdc.pos != pos) {
102 spin_unlock(&fi->rdc.lock);
103 return;
104 }
105
106 fi->rdc.cached = true;
107 end = ALIGN(fi->rdc.size, PAGE_SIZE);
108 spin_unlock(&fi->rdc.lock);
109
110 /* truncate unused tail of cache */
111 truncate_inode_pages(file->f_mapping, end);
112}
113
18172b10
MS
114static bool fuse_emit(struct file *file, struct dir_context *ctx,
115 struct fuse_dirent *dirent)
116{
69e34551
MS
117 struct fuse_file *ff = file->private_data;
118
119 if (ff->open_flags & FOPEN_CACHE_DIR)
120 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
121
18172b10
MS
122 return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
123 dirent->type);
124}
125
d123d8e1
MS
126static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
127 struct dir_context *ctx)
128{
129 while (nbytes >= FUSE_NAME_OFFSET) {
130 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
131 size_t reclen = FUSE_DIRENT_SIZE(dirent);
132 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
133 return -EIO;
134 if (reclen > nbytes)
135 break;
136 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
137 return -EIO;
138
18172b10 139 if (!fuse_emit(file, ctx, dirent))
d123d8e1
MS
140 break;
141
142 buf += reclen;
143 nbytes -= reclen;
144 ctx->pos = dirent->off;
145 }
146
147 return 0;
148}
149
150static int fuse_direntplus_link(struct file *file,
151 struct fuse_direntplus *direntplus,
152 u64 attr_version)
153{
154 struct fuse_entry_out *o = &direntplus->entry_out;
155 struct fuse_dirent *dirent = &direntplus->dirent;
156 struct dentry *parent = file->f_path.dentry;
157 struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
158 struct dentry *dentry;
159 struct dentry *alias;
160 struct inode *dir = d_inode(parent);
161 struct fuse_conn *fc;
162 struct inode *inode;
163 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
164
165 if (!o->nodeid) {
166 /*
167 * Unlike in the case of fuse_lookup, zero nodeid does not mean
168 * ENOENT. Instead, it only means the userspace filesystem did
169 * not want to return attributes/handle for this entry.
170 *
171 * So do nothing.
172 */
173 return 0;
174 }
175
176 if (name.name[0] == '.') {
177 /*
178 * We could potentially refresh the attributes of the directory
179 * and its parent?
180 */
181 if (name.len == 1)
182 return 0;
183 if (name.name[1] == '.' && name.len == 2)
184 return 0;
185 }
186
187 if (invalid_nodeid(o->nodeid))
188 return -EIO;
eb59bd17 189 if (fuse_invalid_attr(&o->attr))
d123d8e1
MS
190 return -EIO;
191
192 fc = get_fuse_conn(dir);
193
194 name.hash = full_name_hash(parent, name.name, name.len);
195 dentry = d_lookup(parent, &name);
196 if (!dentry) {
197retry:
198 dentry = d_alloc_parallel(parent, &name, &wq);
199 if (IS_ERR(dentry))
200 return PTR_ERR(dentry);
201 }
202 if (!d_in_lookup(dentry)) {
203 struct fuse_inode *fi;
204 inode = d_inode(dentry);
15db1683
AG
205 if (inode && get_node_id(inode) != o->nodeid)
206 inode = NULL;
d123d8e1 207 if (!inode ||
15db1683
AG
208 fuse_stale_inode(inode, o->generation, &o->attr)) {
209 if (inode)
210 fuse_make_bad(inode);
d123d8e1
MS
211 d_invalidate(dentry);
212 dput(dentry);
213 goto retry;
214 }
5d069dbe 215 if (fuse_is_bad(inode)) {
d123d8e1
MS
216 dput(dentry);
217 return -EIO;
218 }
219
220 fi = get_fuse_inode(inode);
c9d8f5f0 221 spin_lock(&fi->lock);
d123d8e1 222 fi->nlookup++;
c9d8f5f0 223 spin_unlock(&fi->lock);
d123d8e1
MS
224
225 forget_all_cached_acls(inode);
972f4c46 226 fuse_change_attributes(inode, &o->attr, NULL,
9dc10a54 227 ATTR_TIMEOUT(o),
d123d8e1
MS
228 attr_version);
229 /*
230 * The other branch comes via fuse_iget()
231 * which bumps nlookup inside
232 */
233 } else {
234 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
9dc10a54 235 &o->attr, ATTR_TIMEOUT(o),
d123d8e1
MS
236 attr_version);
237 if (!inode)
238 inode = ERR_PTR(-ENOMEM);
239
240 alias = d_splice_alias(inode, dentry);
241 d_lookup_done(dentry);
242 if (alias) {
243 dput(dentry);
244 dentry = alias;
245 }
b8bd342d 246 if (IS_ERR(dentry)) {
247 if (!IS_ERR(inode)) {
248 struct fuse_inode *fi = get_fuse_inode(inode);
249
250 spin_lock(&fi->lock);
251 fi->nlookup--;
252 spin_unlock(&fi->lock);
253 }
d123d8e1 254 return PTR_ERR(dentry);
b8bd342d 255 }
d123d8e1
MS
256 }
257 if (fc->readdirplus_auto)
258 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
259 fuse_change_entry_timeout(dentry, o);
260
261 dput(dentry);
262 return 0;
263}
264
3545fe21
MS
265static void fuse_force_forget(struct file *file, u64 nodeid)
266{
267 struct inode *inode = file_inode(file);
fcee216b 268 struct fuse_mount *fm = get_fuse_mount(inode);
3545fe21
MS
269 struct fuse_forget_in inarg;
270 FUSE_ARGS(args);
271
272 memset(&inarg, 0, sizeof(inarg));
273 inarg.nlookup = 1;
274 args.opcode = FUSE_FORGET;
275 args.nodeid = nodeid;
276 args.in_numargs = 1;
277 args.in_args[0].size = sizeof(inarg);
278 args.in_args[0].value = &inarg;
279 args.force = true;
280 args.noreply = true;
281
fcee216b 282 fuse_simple_request(fm, &args);
3545fe21
MS
283 /* ignore errors */
284}
285
d123d8e1
MS
286static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
287 struct dir_context *ctx, u64 attr_version)
288{
289 struct fuse_direntplus *direntplus;
290 struct fuse_dirent *dirent;
291 size_t reclen;
292 int over = 0;
293 int ret;
294
295 while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
296 direntplus = (struct fuse_direntplus *) buf;
297 dirent = &direntplus->dirent;
298 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
299
300 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
301 return -EIO;
302 if (reclen > nbytes)
303 break;
304 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
305 return -EIO;
306
307 if (!over) {
308 /* We fill entries into dstbuf only as much as
309 it can hold. But we still continue iterating
310 over remaining entries to link them. If not,
311 we need to send a FORGET for each of those
312 which we did not link.
313 */
18172b10 314 over = !fuse_emit(file, ctx, dirent);
d123d8e1
MS
315 if (!over)
316 ctx->pos = dirent->off;
317 }
318
319 buf += reclen;
320 nbytes -= reclen;
321
322 ret = fuse_direntplus_link(file, direntplus, attr_version);
323 if (ret)
324 fuse_force_forget(file, direntplus->entry_out.nodeid);
325 }
326
327 return 0;
328}
329
5d7bc7e8 330static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
d123d8e1 331{
43f5098e
MS
332 int plus;
333 ssize_t res;
d123d8e1
MS
334 struct page *page;
335 struct inode *inode = file_inode(file);
fcee216b 336 struct fuse_mount *fm = get_fuse_mount(inode);
43f5098e
MS
337 struct fuse_io_args ia = {};
338 struct fuse_args_pages *ap = &ia.ap;
339 struct fuse_page_desc desc = { .length = PAGE_SIZE };
d123d8e1
MS
340 u64 attr_version = 0;
341 bool locked;
342
d123d8e1 343 page = alloc_page(GFP_KERNEL);
43f5098e 344 if (!page)
d123d8e1 345 return -ENOMEM;
d123d8e1
MS
346
347 plus = fuse_use_readdirplus(inode, ctx);
cabdb4fa 348 ap->args.out_pages = true;
43f5098e
MS
349 ap->num_pages = 1;
350 ap->pages = &page;
351 ap->descs = &desc;
d123d8e1 352 if (plus) {
fcee216b 353 attr_version = fuse_get_attr_version(fm->fc);
43f5098e
MS
354 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
355 FUSE_READDIRPLUS);
d123d8e1 356 } else {
43f5098e
MS
357 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
358 FUSE_READDIR);
d123d8e1
MS
359 }
360 locked = fuse_lock_inode(inode);
fcee216b 361 res = fuse_simple_request(fm, &ap->args);
d123d8e1 362 fuse_unlock_inode(inode, locked);
43f5098e
MS
363 if (res >= 0) {
364 if (!res) {
69e34551
MS
365 struct fuse_file *ff = file->private_data;
366
367 if (ff->open_flags & FOPEN_CACHE_DIR)
368 fuse_readdir_cache_end(file, ctx->pos);
369 } else if (plus) {
43f5098e 370 res = parse_dirplusfile(page_address(page), res,
d123d8e1
MS
371 file, ctx, attr_version);
372 } else {
43f5098e 373 res = parse_dirfile(page_address(page), res, file,
d123d8e1
MS
374 ctx);
375 }
376 }
377
378 __free_page(page);
379 fuse_invalidate_atime(inode);
43f5098e 380 return res;
d123d8e1 381}
5d7bc7e8
MS
382
383enum fuse_parse_result {
384 FOUND_ERR = -1,
385 FOUND_NONE = 0,
386 FOUND_SOME,
387 FOUND_ALL,
388};
389
390static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
391 void *addr, unsigned int size,
392 struct dir_context *ctx)
393{
394 unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
395 enum fuse_parse_result res = FOUND_NONE;
396
397 WARN_ON(offset >= size);
398
399 for (;;) {
400 struct fuse_dirent *dirent = addr + offset;
401 unsigned int nbytes = size - offset;
e5854b1c 402 size_t reclen;
5d7bc7e8
MS
403
404 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
405 break;
406
e5854b1c
TH
407 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
408
5d7bc7e8
MS
409 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
410 return FOUND_ERR;
411 if (WARN_ON(reclen > nbytes))
412 return FOUND_ERR;
413 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
414 return FOUND_ERR;
415
416 if (ff->readdir.pos == ctx->pos) {
417 res = FOUND_SOME;
418 if (!dir_emit(ctx, dirent->name, dirent->namelen,
419 dirent->ino, dirent->type))
420 return FOUND_ALL;
421 ctx->pos = dirent->off;
422 }
423 ff->readdir.pos = dirent->off;
424 ff->readdir.cache_off += reclen;
425
426 offset += reclen;
427 }
428
429 return res;
430}
431
7118883b 432static void fuse_rdc_reset(struct inode *inode)
3494927e 433{
7118883b
MS
434 struct fuse_inode *fi = get_fuse_inode(inode);
435
3494927e
MS
436 fi->rdc.cached = false;
437 fi->rdc.version++;
438 fi->rdc.size = 0;
439 fi->rdc.pos = 0;
440}
441
5d7bc7e8
MS
442#define UNCACHED 1
443
444static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
445{
446 struct fuse_file *ff = file->private_data;
447 struct inode *inode = file_inode(file);
7118883b 448 struct fuse_conn *fc = get_fuse_conn(inode);
5d7bc7e8
MS
449 struct fuse_inode *fi = get_fuse_inode(inode);
450 enum fuse_parse_result res;
451 pgoff_t index;
452 unsigned int size;
453 struct page *page;
454 void *addr;
455
456 /* Seeked? If so, reset the cache stream */
457 if (ff->readdir.pos != ctx->pos) {
458 ff->readdir.pos = 0;
459 ff->readdir.cache_off = 0;
460 }
461
7118883b
MS
462 /*
463 * We're just about to start reading into the cache or reading the
464 * cache; both cases require an up-to-date mtime value.
465 */
466 if (!ctx->pos && fc->auto_inval_data) {
c6c745b8 467 int err = fuse_update_attributes(inode, file, STATX_MTIME);
7118883b
MS
468
469 if (err)
470 return err;
471 }
472
5d7bc7e8
MS
473retry:
474 spin_lock(&fi->rdc.lock);
7118883b 475retry_locked:
5d7bc7e8 476 if (!fi->rdc.cached) {
7118883b
MS
477 /* Starting cache? Set cache mtime. */
478 if (!ctx->pos && !fi->rdc.size) {
3c0d5df2 479 fi->rdc.mtime = inode_get_mtime(inode);
261aaba7 480 fi->rdc.iversion = inode_query_iversion(inode);
7118883b 481 }
5d7bc7e8
MS
482 spin_unlock(&fi->rdc.lock);
483 return UNCACHED;
484 }
7118883b
MS
485 /*
486 * When at the beginning of the directory (i.e. just after opendir(3) or
487 * rewinddir(3)), then need to check whether directory contents have
488 * changed, and reset the cache if so.
489 */
490 if (!ctx->pos) {
3c0d5df2
JL
491 struct timespec64 mtime = inode_get_mtime(inode);
492
261aaba7 493 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
3c0d5df2 494 !timespec64_equal(&fi->rdc.mtime, &mtime)) {
7118883b
MS
495 fuse_rdc_reset(inode);
496 goto retry_locked;
497 }
498 }
499
3494927e
MS
500 /*
501 * If cache version changed since the last getdents() call, then reset
502 * the cache stream.
503 */
504 if (ff->readdir.version != fi->rdc.version) {
505 ff->readdir.pos = 0;
506 ff->readdir.cache_off = 0;
507 }
508 /*
509 * If at the beginning of the cache, than reset version to
510 * current.
511 */
512 if (ff->readdir.pos == 0)
513 ff->readdir.version = fi->rdc.version;
514
5d7bc7e8
MS
515 WARN_ON(fi->rdc.size < ff->readdir.cache_off);
516
517 index = ff->readdir.cache_off >> PAGE_SHIFT;
518
519 if (index == (fi->rdc.size >> PAGE_SHIFT))
520 size = fi->rdc.size & ~PAGE_MASK;
521 else
522 size = PAGE_SIZE;
523 spin_unlock(&fi->rdc.lock);
524
525 /* EOF? */
526 if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
527 return 0;
528
529 page = find_get_page_flags(file->f_mapping, index,
530 FGP_ACCESSED | FGP_LOCK);
9fa248c6
MS
531 /* Page gone missing, then re-added to cache, but not initialized? */
532 if (page && !PageUptodate(page)) {
533 unlock_page(page);
534 put_page(page);
535 page = NULL;
536 }
3494927e 537 spin_lock(&fi->rdc.lock);
5d7bc7e8
MS
538 if (!page) {
539 /*
540 * Uh-oh: page gone missing, cache is useless
541 */
3494927e 542 if (fi->rdc.version == ff->readdir.version)
7118883b
MS
543 fuse_rdc_reset(inode);
544 goto retry_locked;
5d7bc7e8
MS
545 }
546
3494927e
MS
547 /* Make sure it's still the same version after getting the page. */
548 if (ff->readdir.version != fi->rdc.version) {
549 spin_unlock(&fi->rdc.lock);
550 unlock_page(page);
551 put_page(page);
552 goto retry;
553 }
554 spin_unlock(&fi->rdc.lock);
555
556 /*
557 * Contents of the page are now protected against changing by holding
558 * the page lock.
559 */
a1db2f7e 560 addr = kmap_local_page(page);
5d7bc7e8 561 res = fuse_parse_cache(ff, addr, size, ctx);
a1db2f7e 562 kunmap_local(addr);
5d7bc7e8
MS
563 unlock_page(page);
564 put_page(page);
565
566 if (res == FOUND_ERR)
567 return -EIO;
568
569 if (res == FOUND_ALL)
570 return 0;
571
572 if (size == PAGE_SIZE) {
573 /* We hit end of page: skip to next page. */
574 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
575 goto retry;
576 }
577
578 /*
579 * End of cache reached. If found position, then we are done, otherwise
580 * need to fall back to uncached, since the position we were looking for
581 * wasn't in the cache.
582 */
583 return res == FOUND_SOME ? 0 : UNCACHED;
584}
585
586int fuse_readdir(struct file *file, struct dir_context *ctx)
587{
588 struct fuse_file *ff = file->private_data;
589 struct inode *inode = file_inode(file);
590 int err;
591
5d069dbe 592 if (fuse_is_bad(inode))
5d7bc7e8
MS
593 return -EIO;
594
5d7bc7e8
MS
595 err = UNCACHED;
596 if (ff->open_flags & FOPEN_CACHE_DIR)
597 err = fuse_readdir_cached(file, ctx);
598 if (err == UNCACHED)
599 err = fuse_readdir_uncached(file, ctx);
600
5d7bc7e8
MS
601 return err;
602}