blk-mq: don't schedule block kworker on isolated CPUs
[linux-block.git] / fs / readdir.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
1da177e4
LT
2/*
3 * linux/fs/readdir.c
4 *
5 * Copyright (C) 1995 Linus Torvalds
6 */
7
85c9fe8f 8#include <linux/stddef.h>
022a1692 9#include <linux/kernel.h>
630d9c47 10#include <linux/export.h>
1da177e4
LT
11#include <linux/time.h>
12#include <linux/mm.h>
13#include <linux/errno.h>
14#include <linux/stat.h>
15#include <linux/file.h>
1da177e4 16#include <linux/fs.h>
d4c7cf6c 17#include <linux/fsnotify.h>
1da177e4
LT
18#include <linux/dirent.h>
19#include <linux/security.h>
20#include <linux/syscalls.h>
21#include <linux/unistd.h>
0460b2a2 22#include <linux/compat.h>
7c0f6ba6 23#include <linux/uaccess.h>
1da177e4 24
9f79b78e
LT
25#include <asm/unaligned.h>
26
3e327154
LT
27/*
28 * Some filesystems were never converted to '->iterate_shared()'
29 * and their directory iterators want the inode lock held for
30 * writing. This wrapper allows for converting from the shared
31 * semantics to the exclusive inode use.
32 */
33int wrap_directory_iterator(struct file *file,
34 struct dir_context *ctx,
35 int (*iter)(struct file *, struct dir_context *))
36{
37 struct inode *inode = file_inode(file);
38 int ret;
39
40 /*
41 * We'd love to have an 'inode_upgrade_trylock()' operation,
42 * see the comment in mmap_upgrade_trylock() in mm/memory.c.
43 *
44 * But considering this is for "filesystems that never got
45 * converted", it really doesn't matter.
46 *
47 * Also note that since we have to return with the lock held
48 * for reading, we can't use the "killable()" locking here,
49 * since we do need to get the lock even if we're dying.
50 *
51 * We could do the write part killably and then get the read
52 * lock unconditionally if it mattered, but see above on why
53 * this does the very simplistic conversion.
54 */
55 up_read(&inode->i_rwsem);
56 down_write(&inode->i_rwsem);
57
58 /*
59 * Since we dropped the inode lock, we should do the
60 * DEADDIR test again. See 'iterate_dir()' below.
61 *
62 * Note that we don't need to re-do the f_pos games,
63 * since the file must be locked wrt f_pos anyway.
64 */
65 ret = -ENOENT;
66 if (!IS_DEADDIR(inode))
67 ret = iter(file, ctx);
68
69 downgrade_write(&inode->i_rwsem);
70 return ret;
71}
72EXPORT_SYMBOL(wrap_directory_iterator);
73
9f79b78e
LT
74/*
75 * Note the "unsafe_put_user() semantics: we goto a
76 * label for errors.
9f79b78e
LT
77 */
78#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
79 char __user *dst = (_dst); \
80 const char *src = (_src); \
81 size_t len = (_len); \
c512c691
LT
82 unsafe_put_user(0, dst+len, label); \
83 unsafe_copy_to_user(dst, src, len, label); \
9f79b78e
LT
84} while (0)
85
86
5c0ba4e0 87int iterate_dir(struct file *file, struct dir_context *ctx)
1da177e4 88{
496ad9aa 89 struct inode *inode = file_inode(file);
1da177e4 90 int res = -ENOTDIR;
3e327154
LT
91
92 if (!file->f_op->iterate_shared)
1da177e4
LT
93 goto out;
94
95 res = security_file_permission(file, MAY_READ);
96 if (res)
97 goto out;
98
d9e5d310
AG
99 res = fsnotify_file_perm(file, MAY_READ);
100 if (res)
101 goto out;
102
3e327154 103 res = down_read_killable(&inode->i_rwsem);
0dc208b5
KT
104 if (res)
105 goto out;
da784511 106
1da177e4
LT
107 res = -ENOENT;
108 if (!IS_DEADDIR(inode)) {
2233f31a 109 ctx->pos = file->f_pos;
3e327154 110 res = file->f_op->iterate_shared(file, ctx);
2233f31a 111 file->f_pos = ctx->pos;
d4c7cf6c 112 fsnotify_access(file);
1da177e4
LT
113 file_accessed(file);
114 }
3e327154 115 inode_unlock_shared(inode);
1da177e4
LT
116out:
117 return res;
118}
5c0ba4e0 119EXPORT_SYMBOL(iterate_dir);
1da177e4 120
8a23eb80
LT
121/*
122 * POSIX says that a dirent name cannot contain NULL or a '/'.
123 *
124 * It's not 100% clear what we should really do in this case.
125 * The filesystem is clearly corrupted, but returning a hard
126 * error means that you now don't see any of the other names
127 * either, so that isn't a perfect alternative.
128 *
129 * And if you return an error, what error do you use? Several
130 * filesystems seem to have decided on EUCLEAN being the error
131 * code for EFSCORRUPTED, and that may be the error to use. Or
132 * just EIO, which is perhaps more obvious to users.
133 *
134 * In order to see the other file names in the directory, the
135 * caller might want to make this a "soft" error: skip the
136 * entry, and return the error at the end instead.
137 *
138 * Note that this should likely do a "memchr(name, 0, len)"
139 * check too, since that would be filesystem corruption as
140 * well. However, that case can't actually confuse user space,
141 * which has to do a strlen() on the name anyway to find the
142 * filename length, and the above "soft error" worry means
143 * that it's probably better left alone until we have that
144 * issue clarified.
2c6b7bcd
LT
145 *
146 * Note the PATH_MAX check - it's arbitrary but the real
147 * kernel limit on a possible path component, not NAME_MAX,
148 * which is the technical standard limit.
8a23eb80
LT
149 */
150static int verify_dirent_name(const char *name, int len)
151{
2c6b7bcd 152 if (len <= 0 || len >= PATH_MAX)
8a23eb80 153 return -EIO;
b9959c7a 154 if (memchr(name, '/', len))
8a23eb80
LT
155 return -EIO;
156 return 0;
157}
158
1da177e4
LT
159/*
160 * Traditional linux readdir() handling..
161 *
162 * "count=1" is a special case, meaning that the buffer is one
163 * dirent-structure in size and that the code can't handle more
164 * anyway. Thus the special "fillonedir()" function for that
165 * case (the low-level handlers don't need to care about this).
166 */
1da177e4
LT
167
168#ifdef __ARCH_WANT_OLD_READDIR
169
170struct old_linux_dirent {
171 unsigned long d_ino;
172 unsigned long d_offset;
173 unsigned short d_namlen;
2507135e 174 char d_name[];
1da177e4
LT
175};
176
177struct readdir_callback {
5c0ba4e0 178 struct dir_context ctx;
1da177e4
LT
179 struct old_linux_dirent __user * dirent;
180 int result;
181};
182
25885a35 183static bool fillonedir(struct dir_context *ctx, const char *name, int namlen,
ac7576f4 184 loff_t offset, u64 ino, unsigned int d_type)
1da177e4 185{
ac7576f4
MS
186 struct readdir_callback *buf =
187 container_of(ctx, struct readdir_callback, ctx);
1da177e4 188 struct old_linux_dirent __user * dirent;
afefdbb2 189 unsigned long d_ino;
1da177e4
LT
190
191 if (buf->result)
25885a35 192 return false;
0c93ac69 193 buf->result = verify_dirent_name(name, namlen);
25885a35
AV
194 if (buf->result)
195 return false;
afefdbb2 196 d_ino = ino;
8f3f655d
AV
197 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
198 buf->result = -EOVERFLOW;
25885a35 199 return false;
8f3f655d 200 }
1da177e4
LT
201 buf->result++;
202 dirent = buf->dirent;
391b7461 203 if (!user_write_access_begin(dirent,
1da177e4
LT
204 (unsigned long)(dirent->d_name + namlen + 1) -
205 (unsigned long)dirent))
206 goto efault;
391b7461
AV
207 unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
208 unsafe_put_user(offset, &dirent->d_offset, efault_end);
209 unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
210 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
211 user_write_access_end();
25885a35 212 return true;
391b7461
AV
213efault_end:
214 user_write_access_end();
1da177e4
LT
215efault:
216 buf->result = -EFAULT;
25885a35 217 return false;
1da177e4
LT
218}
219
d4e82042
HC
220SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
221 struct old_linux_dirent __user *, dirent, unsigned int, count)
1da177e4
LT
222{
223 int error;
63b6df14 224 struct fd f = fdget_pos(fd);
ac6614b7
AV
225 struct readdir_callback buf = {
226 .ctx.actor = fillonedir,
227 .dirent = dirent
228 };
1da177e4 229
2903ff01 230 if (!f.file)
863ced7f 231 return -EBADF;
1da177e4 232
5c0ba4e0 233 error = iterate_dir(f.file, &buf.ctx);
53c9c5c0 234 if (buf.result)
1da177e4
LT
235 error = buf.result;
236
63b6df14 237 fdput_pos(f);
1da177e4
LT
238 return error;
239}
240
241#endif /* __ARCH_WANT_OLD_READDIR */
242
243/*
244 * New, all-improved, singing, dancing, iBCS2-compliant getdents()
245 * interface.
246 */
247struct linux_dirent {
248 unsigned long d_ino;
249 unsigned long d_off;
250 unsigned short d_reclen;
2507135e 251 char d_name[];
1da177e4
LT
252};
253
254struct getdents_callback {
5c0ba4e0 255 struct dir_context ctx;
1da177e4 256 struct linux_dirent __user * current_dir;
3c2659bd 257 int prev_reclen;
1da177e4
LT
258 int count;
259 int error;
260};
261
25885a35 262static bool filldir(struct dir_context *ctx, const char *name, int namlen,
ac7576f4 263 loff_t offset, u64 ino, unsigned int d_type)
1da177e4 264{
3c2659bd 265 struct linux_dirent __user *dirent, *prev;
ac7576f4
MS
266 struct getdents_callback *buf =
267 container_of(ctx, struct getdents_callback, ctx);
afefdbb2 268 unsigned long d_ino;
85c9fe8f
KW
269 int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
270 sizeof(long));
3c2659bd 271 int prev_reclen;
1da177e4 272
8a23eb80
LT
273 buf->error = verify_dirent_name(name, namlen);
274 if (unlikely(buf->error))
25885a35 275 return false;
1da177e4
LT
276 buf->error = -EINVAL; /* only used if we fail.. */
277 if (reclen > buf->count)
25885a35 278 return false;
afefdbb2 279 d_ino = ino;
8f3f655d
AV
280 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
281 buf->error = -EOVERFLOW;
25885a35 282 return false;
8f3f655d 283 }
3c2659bd
LT
284 prev_reclen = buf->prev_reclen;
285 if (prev_reclen && signal_pending(current))
25885a35 286 return false;
9f79b78e 287 dirent = buf->current_dir;
3c2659bd 288 prev = (void __user *) dirent - prev_reclen;
41cd7805 289 if (!user_write_access_begin(prev, reclen + prev_reclen))
3c2659bd
LT
290 goto efault;
291
292 /* This might be 'dirent->d_off', but if so it will get overwritten */
293 unsafe_put_user(offset, &prev->d_off, efault_end);
9f79b78e
LT
294 unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
295 unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
296 unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
297 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
41cd7805 298 user_write_access_end();
9f79b78e 299
3c2659bd
LT
300 buf->current_dir = (void __user *)dirent + reclen;
301 buf->prev_reclen = reclen;
1da177e4 302 buf->count -= reclen;
25885a35 303 return true;
9f79b78e 304efault_end:
41cd7805 305 user_write_access_end();
1da177e4
LT
306efault:
307 buf->error = -EFAULT;
25885a35 308 return false;
1da177e4
LT
309}
310
20f37034
HC
311SYSCALL_DEFINE3(getdents, unsigned int, fd,
312 struct linux_dirent __user *, dirent, unsigned int, count)
1da177e4 313{
2903ff01 314 struct fd f;
ac6614b7
AV
315 struct getdents_callback buf = {
316 .ctx.actor = filldir,
317 .count = count,
318 .current_dir = dirent
319 };
1da177e4
LT
320 int error;
321
63b6df14 322 f = fdget_pos(fd);
2903ff01 323 if (!f.file)
863ced7f 324 return -EBADF;
1da177e4 325
5c0ba4e0 326 error = iterate_dir(f.file, &buf.ctx);
53c9c5c0
AV
327 if (error >= 0)
328 error = buf.error;
3c2659bd
LT
329 if (buf.prev_reclen) {
330 struct linux_dirent __user * lastdirent;
331 lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
332
bb6f619b 333 if (put_user(buf.ctx.pos, &lastdirent->d_off))
1da177e4
LT
334 error = -EFAULT;
335 else
336 error = count - buf.count;
337 }
63b6df14 338 fdput_pos(f);
1da177e4
LT
339 return error;
340}
341
1da177e4 342struct getdents_callback64 {
5c0ba4e0 343 struct dir_context ctx;
1da177e4 344 struct linux_dirent64 __user * current_dir;
3c2659bd 345 int prev_reclen;
1da177e4
LT
346 int count;
347 int error;
348};
349
25885a35 350static bool filldir64(struct dir_context *ctx, const char *name, int namlen,
ac7576f4 351 loff_t offset, u64 ino, unsigned int d_type)
1da177e4 352{
3c2659bd 353 struct linux_dirent64 __user *dirent, *prev;
ac7576f4
MS
354 struct getdents_callback64 *buf =
355 container_of(ctx, struct getdents_callback64, ctx);
85c9fe8f
KW
356 int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
357 sizeof(u64));
3c2659bd 358 int prev_reclen;
1da177e4 359
8a23eb80
LT
360 buf->error = verify_dirent_name(name, namlen);
361 if (unlikely(buf->error))
25885a35 362 return false;
1da177e4
LT
363 buf->error = -EINVAL; /* only used if we fail.. */
364 if (reclen > buf->count)
25885a35 365 return false;
3c2659bd
LT
366 prev_reclen = buf->prev_reclen;
367 if (prev_reclen && signal_pending(current))
25885a35 368 return false;
9f79b78e 369 dirent = buf->current_dir;
3c2659bd 370 prev = (void __user *)dirent - prev_reclen;
41cd7805 371 if (!user_write_access_begin(prev, reclen + prev_reclen))
3c2659bd
LT
372 goto efault;
373
374 /* This might be 'dirent->d_off', but if so it will get overwritten */
375 unsafe_put_user(offset, &prev->d_off, efault_end);
9f79b78e
LT
376 unsafe_put_user(ino, &dirent->d_ino, efault_end);
377 unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
378 unsafe_put_user(d_type, &dirent->d_type, efault_end);
379 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
41cd7805 380 user_write_access_end();
9f79b78e 381
3c2659bd
LT
382 buf->prev_reclen = reclen;
383 buf->current_dir = (void __user *)dirent + reclen;
1da177e4 384 buf->count -= reclen;
25885a35 385 return true;
3c2659bd 386
9f79b78e 387efault_end:
41cd7805 388 user_write_access_end();
1da177e4
LT
389efault:
390 buf->error = -EFAULT;
25885a35 391 return false;
1da177e4
LT
392}
393
fb2da16c
CH
394SYSCALL_DEFINE3(getdents64, unsigned int, fd,
395 struct linux_dirent64 __user *, dirent, unsigned int, count)
1da177e4 396{
2903ff01 397 struct fd f;
ac6614b7
AV
398 struct getdents_callback64 buf = {
399 .ctx.actor = filldir64,
400 .count = count,
401 .current_dir = dirent
402 };
1da177e4
LT
403 int error;
404
63b6df14 405 f = fdget_pos(fd);
2903ff01 406 if (!f.file)
863ced7f 407 return -EBADF;
1da177e4 408
5c0ba4e0 409 error = iterate_dir(f.file, &buf.ctx);
53c9c5c0
AV
410 if (error >= 0)
411 error = buf.error;
3c2659bd
LT
412 if (buf.prev_reclen) {
413 struct linux_dirent64 __user * lastdirent;
bb6f619b 414 typeof(lastdirent->d_off) d_off = buf.ctx.pos;
3c2659bd
LT
415
416 lastdirent = (void __user *) buf.current_dir - buf.prev_reclen;
5fb15141 417 if (put_user(d_off, &lastdirent->d_off))
53c9c5c0
AV
418 error = -EFAULT;
419 else
420 error = count - buf.count;
1da177e4 421 }
63b6df14 422 fdput_pos(f);
1da177e4
LT
423 return error;
424}
0460b2a2
AV
425
426#ifdef CONFIG_COMPAT
427struct compat_old_linux_dirent {
428 compat_ulong_t d_ino;
429 compat_ulong_t d_offset;
430 unsigned short d_namlen;
2507135e 431 char d_name[];
0460b2a2
AV
432};
433
434struct compat_readdir_callback {
435 struct dir_context ctx;
436 struct compat_old_linux_dirent __user *dirent;
437 int result;
438};
439
25885a35 440static bool compat_fillonedir(struct dir_context *ctx, const char *name,
0460b2a2
AV
441 int namlen, loff_t offset, u64 ino,
442 unsigned int d_type)
443{
444 struct compat_readdir_callback *buf =
445 container_of(ctx, struct compat_readdir_callback, ctx);
446 struct compat_old_linux_dirent __user *dirent;
447 compat_ulong_t d_ino;
448
449 if (buf->result)
25885a35 450 return false;
0c93ac69 451 buf->result = verify_dirent_name(name, namlen);
25885a35
AV
452 if (buf->result)
453 return false;
0460b2a2
AV
454 d_ino = ino;
455 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
456 buf->result = -EOVERFLOW;
25885a35 457 return false;
0460b2a2
AV
458 }
459 buf->result++;
460 dirent = buf->dirent;
391b7461 461 if (!user_write_access_begin(dirent,
0460b2a2
AV
462 (unsigned long)(dirent->d_name + namlen + 1) -
463 (unsigned long)dirent))
464 goto efault;
391b7461
AV
465 unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
466 unsafe_put_user(offset, &dirent->d_offset, efault_end);
467 unsafe_put_user(namlen, &dirent->d_namlen, efault_end);
468 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
469 user_write_access_end();
25885a35 470 return true;
391b7461
AV
471efault_end:
472 user_write_access_end();
0460b2a2
AV
473efault:
474 buf->result = -EFAULT;
25885a35 475 return false;
0460b2a2
AV
476}
477
478COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
479 struct compat_old_linux_dirent __user *, dirent, unsigned int, count)
480{
481 int error;
482 struct fd f = fdget_pos(fd);
483 struct compat_readdir_callback buf = {
484 .ctx.actor = compat_fillonedir,
485 .dirent = dirent
486 };
487
488 if (!f.file)
489 return -EBADF;
490
491 error = iterate_dir(f.file, &buf.ctx);
492 if (buf.result)
493 error = buf.result;
494
495 fdput_pos(f);
496 return error;
497}
498
499struct compat_linux_dirent {
500 compat_ulong_t d_ino;
501 compat_ulong_t d_off;
502 unsigned short d_reclen;
2507135e 503 char d_name[];
0460b2a2
AV
504};
505
506struct compat_getdents_callback {
507 struct dir_context ctx;
508 struct compat_linux_dirent __user *current_dir;
82af599b 509 int prev_reclen;
0460b2a2
AV
510 int count;
511 int error;
512};
513
25885a35 514static bool compat_filldir(struct dir_context *ctx, const char *name, int namlen,
0460b2a2
AV
515 loff_t offset, u64 ino, unsigned int d_type)
516{
82af599b 517 struct compat_linux_dirent __user *dirent, *prev;
0460b2a2
AV
518 struct compat_getdents_callback *buf =
519 container_of(ctx, struct compat_getdents_callback, ctx);
520 compat_ulong_t d_ino;
521 int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
522 namlen + 2, sizeof(compat_long_t));
82af599b 523 int prev_reclen;
0460b2a2 524
82af599b
AV
525 buf->error = verify_dirent_name(name, namlen);
526 if (unlikely(buf->error))
25885a35 527 return false;
0460b2a2
AV
528 buf->error = -EINVAL; /* only used if we fail.. */
529 if (reclen > buf->count)
25885a35 530 return false;
0460b2a2
AV
531 d_ino = ino;
532 if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
533 buf->error = -EOVERFLOW;
25885a35 534 return false;
0460b2a2 535 }
82af599b
AV
536 prev_reclen = buf->prev_reclen;
537 if (prev_reclen && signal_pending(current))
25885a35 538 return false;
0460b2a2 539 dirent = buf->current_dir;
82af599b
AV
540 prev = (void __user *) dirent - prev_reclen;
541 if (!user_write_access_begin(prev, reclen + prev_reclen))
0460b2a2 542 goto efault;
82af599b
AV
543
544 unsafe_put_user(offset, &prev->d_off, efault_end);
545 unsafe_put_user(d_ino, &dirent->d_ino, efault_end);
546 unsafe_put_user(reclen, &dirent->d_reclen, efault_end);
547 unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end);
548 unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
549 user_write_access_end();
550
551 buf->prev_reclen = reclen;
552 buf->current_dir = (void __user *)dirent + reclen;
0460b2a2 553 buf->count -= reclen;
25885a35 554 return true;
82af599b
AV
555efault_end:
556 user_write_access_end();
0460b2a2
AV
557efault:
558 buf->error = -EFAULT;
25885a35 559 return false;
0460b2a2
AV
560}
561
562COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
563 struct compat_linux_dirent __user *, dirent, unsigned int, count)
564{
565 struct fd f;
0460b2a2
AV
566 struct compat_getdents_callback buf = {
567 .ctx.actor = compat_filldir,
568 .current_dir = dirent,
569 .count = count
570 };
571 int error;
572
0460b2a2
AV
573 f = fdget_pos(fd);
574 if (!f.file)
575 return -EBADF;
576
577 error = iterate_dir(f.file, &buf.ctx);
578 if (error >= 0)
579 error = buf.error;
82af599b
AV
580 if (buf.prev_reclen) {
581 struct compat_linux_dirent __user * lastdirent;
582 lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
583
0460b2a2
AV
584 if (put_user(buf.ctx.pos, &lastdirent->d_off))
585 error = -EFAULT;
586 else
587 error = count - buf.count;
588 }
589 fdput_pos(f);
590 return error;
591}
592#endif