blk-mq: don't count completed flush data request as inflight in case of quiesce
[linux-block.git] / fs / fsopen.c
CommitLineData
b4d0d230 1// SPDX-License-Identifier: GPL-2.0-or-later
24dcb3d9
DH
2/* Filesystem access-by-fd.
3 *
4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
24dcb3d9
DH
6 */
7
8#include <linux/fs_context.h>
ecdab150 9#include <linux/fs_parser.h>
24dcb3d9
DH
10#include <linux/slab.h>
11#include <linux/uaccess.h>
12#include <linux/syscalls.h>
13#include <linux/security.h>
14#include <linux/anon_inodes.h>
15#include <linux/namei.h>
16#include <linux/file.h>
17#include <uapi/linux/mount.h>
ecdab150 18#include "internal.h"
24dcb3d9
DH
19#include "mount.h"
20
007ec26c
DH
21/*
22 * Allow the user to read back any error, warning or informational messages.
23 */
24static ssize_t fscontext_read(struct file *file,
25 char __user *_buf, size_t len, loff_t *pos)
26{
27 struct fs_context *fc = file->private_data;
cc3c0b53 28 struct fc_log *log = fc->log.log;
007ec26c
DH
29 unsigned int logsize = ARRAY_SIZE(log->buffer);
30 ssize_t ret;
31 char *p;
32 bool need_free;
33 int index, n;
34
35 ret = mutex_lock_interruptible(&fc->uapi_mutex);
36 if (ret < 0)
37 return ret;
38
39 if (log->head == log->tail) {
40 mutex_unlock(&fc->uapi_mutex);
41 return -ENODATA;
42 }
43
44 index = log->tail & (logsize - 1);
45 p = log->buffer[index];
46 need_free = log->need_free & (1 << index);
47 log->buffer[index] = NULL;
48 log->need_free &= ~(1 << index);
49 log->tail++;
50 mutex_unlock(&fc->uapi_mutex);
51
52 ret = -EMSGSIZE;
53 n = strlen(p);
54 if (n > len)
55 goto err_free;
56 ret = -EFAULT;
57 if (copy_to_user(_buf, p, n) != 0)
58 goto err_free;
59 ret = n;
60
61err_free:
62 if (need_free)
63 kfree(p);
64 return ret;
65}
66
24dcb3d9
DH
67static int fscontext_release(struct inode *inode, struct file *file)
68{
69 struct fs_context *fc = file->private_data;
70
71 if (fc) {
72 file->private_data = NULL;
73 put_fs_context(fc);
74 }
75 return 0;
76}
77
78const struct file_operations fscontext_fops = {
007ec26c 79 .read = fscontext_read,
24dcb3d9
DH
80 .release = fscontext_release,
81 .llseek = no_llseek,
82};
83
84/*
85 * Attach a filesystem context to a file and an fd.
86 */
87static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
88{
89 int fd;
90
1cdc415f 91 fd = anon_inode_getfd("[fscontext]", &fscontext_fops, fc,
24dcb3d9
DH
92 O_RDWR | o_flags);
93 if (fd < 0)
94 put_fs_context(fc);
95 return fd;
96}
97
007ec26c
DH
98static int fscontext_alloc_log(struct fs_context *fc)
99{
cc3c0b53
AV
100 fc->log.log = kzalloc(sizeof(*fc->log.log), GFP_KERNEL);
101 if (!fc->log.log)
007ec26c 102 return -ENOMEM;
cc3c0b53
AV
103 refcount_set(&fc->log.log->usage, 1);
104 fc->log.log->owner = fc->fs_type->owner;
007ec26c
DH
105 return 0;
106}
107
24dcb3d9
DH
108/*
109 * Open a filesystem by name so that it can be configured for mounting.
110 *
111 * We are allowed to specify a container in which the filesystem will be
112 * opened, thereby indicating which namespaces will be used (notably, which
113 * network namespace will be used for network filesystems).
114 */
115SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
116{
117 struct file_system_type *fs_type;
118 struct fs_context *fc;
119 const char *fs_name;
007ec26c 120 int ret;
24dcb3d9 121
a5f85d78 122 if (!may_mount())
24dcb3d9
DH
123 return -EPERM;
124
125 if (flags & ~FSOPEN_CLOEXEC)
126 return -EINVAL;
127
128 fs_name = strndup_user(_fs_name, PAGE_SIZE);
129 if (IS_ERR(fs_name))
130 return PTR_ERR(fs_name);
131
132 fs_type = get_fs_type(fs_name);
133 kfree(fs_name);
134 if (!fs_type)
135 return -ENODEV;
136
137 fc = fs_context_for_mount(fs_type, 0);
138 put_filesystem(fs_type);
139 if (IS_ERR(fc))
140 return PTR_ERR(fc);
141
142 fc->phase = FS_CONTEXT_CREATE_PARAMS;
007ec26c
DH
143
144 ret = fscontext_alloc_log(fc);
145 if (ret < 0)
146 goto err_fc;
147
24dcb3d9 148 return fscontext_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0);
007ec26c
DH
149
150err_fc:
151 put_fs_context(fc);
152 return ret;
24dcb3d9 153}
ecdab150 154
cf3cba4a
DH
155/*
156 * Pick a superblock into a context for reconfiguration.
157 */
158SYSCALL_DEFINE3(fspick, int, dfd, const char __user *, path, unsigned int, flags)
159{
160 struct fs_context *fc;
161 struct path target;
162 unsigned int lookup_flags;
163 int ret;
164
a5f85d78 165 if (!may_mount())
cf3cba4a
DH
166 return -EPERM;
167
168 if ((flags & ~(FSPICK_CLOEXEC |
169 FSPICK_SYMLINK_NOFOLLOW |
170 FSPICK_NO_AUTOMOUNT |
171 FSPICK_EMPTY_PATH)) != 0)
172 return -EINVAL;
173
174 lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
175 if (flags & FSPICK_SYMLINK_NOFOLLOW)
176 lookup_flags &= ~LOOKUP_FOLLOW;
177 if (flags & FSPICK_NO_AUTOMOUNT)
178 lookup_flags &= ~LOOKUP_AUTOMOUNT;
179 if (flags & FSPICK_EMPTY_PATH)
180 lookup_flags |= LOOKUP_EMPTY;
181 ret = user_path_at(dfd, path, lookup_flags, &target);
182 if (ret < 0)
183 goto err;
184
185 ret = -EINVAL;
186 if (target.mnt->mnt_root != target.dentry)
187 goto err_path;
188
189 fc = fs_context_for_reconfigure(target.dentry, 0, 0);
190 if (IS_ERR(fc)) {
191 ret = PTR_ERR(fc);
192 goto err_path;
193 }
194
195 fc->phase = FS_CONTEXT_RECONF_PARAMS;
196
197 ret = fscontext_alloc_log(fc);
198 if (ret < 0)
199 goto err_fc;
200
201 path_put(&target);
202 return fscontext_create_fd(fc, flags & FSPICK_CLOEXEC ? O_CLOEXEC : 0);
203
204err_fc:
205 put_fs_context(fc);
206err_path:
207 path_put(&target);
208err:
209 return ret;
210}
211
22ed7ecd 212static int vfs_cmd_create(struct fs_context *fc, bool exclusive)
dae8b08d
CB
213{
214 struct super_block *sb;
215 int ret;
216
217 if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
218 return -EBUSY;
219
220 if (!mount_capable(fc))
221 return -EPERM;
222
22ed7ecd
CB
223 /* require the new mount api */
224 if (exclusive && fc->ops == &legacy_fs_context_ops)
225 return -EOPNOTSUPP;
226
dae8b08d 227 fc->phase = FS_CONTEXT_CREATING;
22ed7ecd 228 fc->exclusive = exclusive;
dae8b08d
CB
229
230 ret = vfs_get_tree(fc);
231 if (ret) {
232 fc->phase = FS_CONTEXT_FAILED;
233 return ret;
234 }
235
236 sb = fc->root->d_sb;
237 ret = security_sb_kern_mount(sb);
238 if (unlikely(ret)) {
239 fc_drop_locked(fc);
240 fc->phase = FS_CONTEXT_FAILED;
241 return ret;
242 }
243
244 /* vfs_get_tree() callchains will have grabbed @s_umount */
245 up_write(&sb->s_umount);
246 fc->phase = FS_CONTEXT_AWAITING_MOUNT;
247 return 0;
248}
249
11a51d8c
CB
250static int vfs_cmd_reconfigure(struct fs_context *fc)
251{
252 struct super_block *sb;
253 int ret;
254
255 if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
256 return -EBUSY;
257
258 fc->phase = FS_CONTEXT_RECONFIGURING;
259
260 sb = fc->root->d_sb;
261 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
262 fc->phase = FS_CONTEXT_FAILED;
263 return -EPERM;
264 }
265
266 down_write(&sb->s_umount);
267 ret = reconfigure_super(fc);
268 up_write(&sb->s_umount);
269 if (ret) {
270 fc->phase = FS_CONTEXT_FAILED;
271 return ret;
272 }
273
274 vfs_clean_context(fc);
275 return 0;
276}
277
ecdab150
DH
278/*
279 * Check the state and apply the configuration. Note that this function is
280 * allowed to 'steal' the value by setting param->xxx to NULL before returning.
281 */
282static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
283 struct fs_parameter *param)
284{
ecdab150
DH
285 int ret;
286
287 ret = finish_clean_context(fc);
288 if (ret)
289 return ret;
290 switch (cmd) {
291 case FSCONFIG_CMD_CREATE:
22ed7ecd
CB
292 return vfs_cmd_create(fc, false);
293 case FSCONFIG_CMD_CREATE_EXCL:
294 return vfs_cmd_create(fc, true);
ecdab150 295 case FSCONFIG_CMD_RECONFIGURE:
11a51d8c 296 return vfs_cmd_reconfigure(fc);
ecdab150
DH
297 default:
298 if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
299 fc->phase != FS_CONTEXT_RECONF_PARAMS)
300 return -EBUSY;
301
302 return vfs_parse_fs_param(fc, param);
303 }
ecdab150
DH
304}
305
306/**
307 * sys_fsconfig - Set parameters and trigger actions on a context
308 * @fd: The filesystem context to act upon
309 * @cmd: The action to take
310 * @_key: Where appropriate, the parameter key to set
311 * @_value: Where appropriate, the parameter value to set
312 * @aux: Additional information for the value
313 *
314 * This system call is used to set parameters on a context, including
315 * superblock settings, data source and security labelling.
316 *
317 * Actions include triggering the creation of a superblock and the
318 * reconfiguration of the superblock attached to the specified context.
319 *
320 * When setting a parameter, @cmd indicates the type of value being proposed
321 * and @_key indicates the parameter to be altered.
322 *
323 * @_value and @aux are used to specify the value, should a value be required:
324 *
325 * (*) fsconfig_set_flag: No value is specified. The parameter must be boolean
326 * in nature. The key may be prefixed with "no" to invert the
327 * setting. @_value must be NULL and @aux must be 0.
328 *
329 * (*) fsconfig_set_string: A string value is specified. The parameter can be
330 * expecting boolean, integer, string or take a path. A conversion to an
331 * appropriate type will be attempted (which may include looking up as a
332 * path). @_value points to a NUL-terminated string and @aux must be 0.
333 *
334 * (*) fsconfig_set_binary: A binary blob is specified. @_value points to the
335 * blob and @aux indicates its size. The parameter must be expecting a
336 * blob.
337 *
338 * (*) fsconfig_set_path: A non-empty path is specified. The parameter must be
339 * expecting a path object. @_value points to a NUL-terminated string that
340 * is the path and @aux is a file descriptor at which to start a relative
341 * lookup or AT_FDCWD.
342 *
343 * (*) fsconfig_set_path_empty: As fsconfig_set_path, but with AT_EMPTY_PATH
344 * implied.
345 *
346 * (*) fsconfig_set_fd: An open file descriptor is specified. @_value must be
347 * NULL and @aux indicates the file descriptor.
348 */
349SYSCALL_DEFINE5(fsconfig,
350 int, fd,
351 unsigned int, cmd,
352 const char __user *, _key,
353 const void __user *, _value,
354 int, aux)
355{
356 struct fs_context *fc;
357 struct fd f;
358 int ret;
aa1918f9 359 int lookup_flags = 0;
ecdab150
DH
360
361 struct fs_parameter param = {
362 .type = fs_value_is_undefined,
363 };
364
365 if (fd < 0)
366 return -EINVAL;
367
368 switch (cmd) {
369 case FSCONFIG_SET_FLAG:
370 if (!_key || _value || aux)
371 return -EINVAL;
372 break;
373 case FSCONFIG_SET_STRING:
374 if (!_key || !_value || aux)
375 return -EINVAL;
376 break;
377 case FSCONFIG_SET_BINARY:
378 if (!_key || !_value || aux <= 0 || aux > 1024 * 1024)
379 return -EINVAL;
380 break;
381 case FSCONFIG_SET_PATH:
382 case FSCONFIG_SET_PATH_EMPTY:
383 if (!_key || !_value || (aux != AT_FDCWD && aux < 0))
384 return -EINVAL;
385 break;
386 case FSCONFIG_SET_FD:
387 if (!_key || _value || aux < 0)
388 return -EINVAL;
389 break;
390 case FSCONFIG_CMD_CREATE:
22ed7ecd 391 case FSCONFIG_CMD_CREATE_EXCL:
ecdab150
DH
392 case FSCONFIG_CMD_RECONFIGURE:
393 if (_key || _value || aux)
394 return -EINVAL;
395 break;
396 default:
397 return -EOPNOTSUPP;
398 }
399
400 f = fdget(fd);
401 if (!f.file)
402 return -EBADF;
403 ret = -EINVAL;
404 if (f.file->f_op != &fscontext_fops)
405 goto out_f;
406
407 fc = f.file->private_data;
408 if (fc->ops == &legacy_fs_context_ops) {
409 switch (cmd) {
410 case FSCONFIG_SET_BINARY:
411 case FSCONFIG_SET_PATH:
412 case FSCONFIG_SET_PATH_EMPTY:
413 case FSCONFIG_SET_FD:
414 ret = -EOPNOTSUPP;
415 goto out_f;
416 }
417 }
418
419 if (_key) {
420 param.key = strndup_user(_key, 256);
421 if (IS_ERR(param.key)) {
422 ret = PTR_ERR(param.key);
423 goto out_f;
424 }
425 }
426
427 switch (cmd) {
428 case FSCONFIG_SET_FLAG:
429 param.type = fs_value_is_flag;
430 break;
431 case FSCONFIG_SET_STRING:
432 param.type = fs_value_is_string;
433 param.string = strndup_user(_value, 256);
434 if (IS_ERR(param.string)) {
435 ret = PTR_ERR(param.string);
436 goto out_key;
437 }
438 param.size = strlen(param.string);
439 break;
440 case FSCONFIG_SET_BINARY:
441 param.type = fs_value_is_blob;
442 param.size = aux;
443 param.blob = memdup_user_nul(_value, aux);
444 if (IS_ERR(param.blob)) {
445 ret = PTR_ERR(param.blob);
446 goto out_key;
447 }
448 break;
aa1918f9
AV
449 case FSCONFIG_SET_PATH_EMPTY:
450 lookup_flags = LOOKUP_EMPTY;
df561f66 451 fallthrough;
ecdab150
DH
452 case FSCONFIG_SET_PATH:
453 param.type = fs_value_is_filename;
aa1918f9 454 param.name = getname_flags(_value, lookup_flags, NULL);
ecdab150
DH
455 if (IS_ERR(param.name)) {
456 ret = PTR_ERR(param.name);
457 goto out_key;
458 }
459 param.dirfd = aux;
460 param.size = strlen(param.name->name);
461 break;
462 case FSCONFIG_SET_FD:
463 param.type = fs_value_is_file;
464 ret = -EBADF;
465 param.file = fget(aux);
466 if (!param.file)
467 goto out_key;
9cf16b38 468 param.dirfd = aux;
ecdab150
DH
469 break;
470 default:
471 break;
472 }
473
474 ret = mutex_lock_interruptible(&fc->uapi_mutex);
475 if (ret == 0) {
476 ret = vfs_fsconfig_locked(fc, cmd, &param);
477 mutex_unlock(&fc->uapi_mutex);
478 }
479
480 /* Clean up the our record of any value that we obtained from
481 * userspace. Note that the value may have been stolen by the LSM or
482 * filesystem, in which case the value pointer will have been cleared.
483 */
484 switch (cmd) {
485 case FSCONFIG_SET_STRING:
486 case FSCONFIG_SET_BINARY:
487 kfree(param.string);
488 break;
489 case FSCONFIG_SET_PATH:
490 case FSCONFIG_SET_PATH_EMPTY:
491 if (param.name)
492 putname(param.name);
493 break;
494 case FSCONFIG_SET_FD:
495 if (param.file)
496 fput(param.file);
497 break;
498 default:
499 break;
500 }
501out_key:
502 kfree(param.key);
503out_f:
504 fdput(f);
505 return ret;
506}