vfs: syscall: Add fsmount() to create a mount for a superblock
[linux-block.git] / fs / fsopen.c
CommitLineData
24dcb3d9
DH
1/* Filesystem access-by-fd.
2 *
3 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 */
11
12#include <linux/fs_context.h>
ecdab150 13#include <linux/fs_parser.h>
24dcb3d9
DH
14#include <linux/slab.h>
15#include <linux/uaccess.h>
16#include <linux/syscalls.h>
17#include <linux/security.h>
18#include <linux/anon_inodes.h>
19#include <linux/namei.h>
20#include <linux/file.h>
21#include <uapi/linux/mount.h>
ecdab150 22#include "internal.h"
24dcb3d9
DH
23#include "mount.h"
24
007ec26c
DH
25/*
26 * Allow the user to read back any error, warning or informational messages.
27 */
28static ssize_t fscontext_read(struct file *file,
29 char __user *_buf, size_t len, loff_t *pos)
30{
31 struct fs_context *fc = file->private_data;
32 struct fc_log *log = fc->log;
33 unsigned int logsize = ARRAY_SIZE(log->buffer);
34 ssize_t ret;
35 char *p;
36 bool need_free;
37 int index, n;
38
39 ret = mutex_lock_interruptible(&fc->uapi_mutex);
40 if (ret < 0)
41 return ret;
42
43 if (log->head == log->tail) {
44 mutex_unlock(&fc->uapi_mutex);
45 return -ENODATA;
46 }
47
48 index = log->tail & (logsize - 1);
49 p = log->buffer[index];
50 need_free = log->need_free & (1 << index);
51 log->buffer[index] = NULL;
52 log->need_free &= ~(1 << index);
53 log->tail++;
54 mutex_unlock(&fc->uapi_mutex);
55
56 ret = -EMSGSIZE;
57 n = strlen(p);
58 if (n > len)
59 goto err_free;
60 ret = -EFAULT;
61 if (copy_to_user(_buf, p, n) != 0)
62 goto err_free;
63 ret = n;
64
65err_free:
66 if (need_free)
67 kfree(p);
68 return ret;
69}
70
24dcb3d9
DH
71static int fscontext_release(struct inode *inode, struct file *file)
72{
73 struct fs_context *fc = file->private_data;
74
75 if (fc) {
76 file->private_data = NULL;
77 put_fs_context(fc);
78 }
79 return 0;
80}
81
82const struct file_operations fscontext_fops = {
007ec26c 83 .read = fscontext_read,
24dcb3d9
DH
84 .release = fscontext_release,
85 .llseek = no_llseek,
86};
87
88/*
89 * Attach a filesystem context to a file and an fd.
90 */
91static int fscontext_create_fd(struct fs_context *fc, unsigned int o_flags)
92{
93 int fd;
94
95 fd = anon_inode_getfd("fscontext", &fscontext_fops, fc,
96 O_RDWR | o_flags);
97 if (fd < 0)
98 put_fs_context(fc);
99 return fd;
100}
101
007ec26c
DH
102static int fscontext_alloc_log(struct fs_context *fc)
103{
104 fc->log = kzalloc(sizeof(*fc->log), GFP_KERNEL);
105 if (!fc->log)
106 return -ENOMEM;
107 refcount_set(&fc->log->usage, 1);
108 fc->log->owner = fc->fs_type->owner;
109 return 0;
110}
111
24dcb3d9
DH
112/*
113 * Open a filesystem by name so that it can be configured for mounting.
114 *
115 * We are allowed to specify a container in which the filesystem will be
116 * opened, thereby indicating which namespaces will be used (notably, which
117 * network namespace will be used for network filesystems).
118 */
119SYSCALL_DEFINE2(fsopen, const char __user *, _fs_name, unsigned int, flags)
120{
121 struct file_system_type *fs_type;
122 struct fs_context *fc;
123 const char *fs_name;
007ec26c 124 int ret;
24dcb3d9
DH
125
126 if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN))
127 return -EPERM;
128
129 if (flags & ~FSOPEN_CLOEXEC)
130 return -EINVAL;
131
132 fs_name = strndup_user(_fs_name, PAGE_SIZE);
133 if (IS_ERR(fs_name))
134 return PTR_ERR(fs_name);
135
136 fs_type = get_fs_type(fs_name);
137 kfree(fs_name);
138 if (!fs_type)
139 return -ENODEV;
140
141 fc = fs_context_for_mount(fs_type, 0);
142 put_filesystem(fs_type);
143 if (IS_ERR(fc))
144 return PTR_ERR(fc);
145
146 fc->phase = FS_CONTEXT_CREATE_PARAMS;
007ec26c
DH
147
148 ret = fscontext_alloc_log(fc);
149 if (ret < 0)
150 goto err_fc;
151
24dcb3d9 152 return fscontext_create_fd(fc, flags & FSOPEN_CLOEXEC ? O_CLOEXEC : 0);
007ec26c
DH
153
154err_fc:
155 put_fs_context(fc);
156 return ret;
24dcb3d9 157}
ecdab150
DH
158
159/*
160 * Check the state and apply the configuration. Note that this function is
161 * allowed to 'steal' the value by setting param->xxx to NULL before returning.
162 */
163static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
164 struct fs_parameter *param)
165{
166 struct super_block *sb;
167 int ret;
168
169 ret = finish_clean_context(fc);
170 if (ret)
171 return ret;
172 switch (cmd) {
173 case FSCONFIG_CMD_CREATE:
174 if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
175 return -EBUSY;
176 fc->phase = FS_CONTEXT_CREATING;
177 ret = vfs_get_tree(fc);
178 if (ret)
179 break;
180 sb = fc->root->d_sb;
181 ret = security_sb_kern_mount(sb);
182 if (unlikely(ret)) {
183 fc_drop_locked(fc);
184 break;
185 }
186 up_write(&sb->s_umount);
187 fc->phase = FS_CONTEXT_AWAITING_MOUNT;
188 return 0;
189 case FSCONFIG_CMD_RECONFIGURE:
190 if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
191 return -EBUSY;
192 fc->phase = FS_CONTEXT_RECONFIGURING;
193 sb = fc->root->d_sb;
194 if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
195 ret = -EPERM;
196 break;
197 }
198 down_write(&sb->s_umount);
199 ret = reconfigure_super(fc);
200 up_write(&sb->s_umount);
201 if (ret)
202 break;
203 vfs_clean_context(fc);
204 return 0;
205 default:
206 if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
207 fc->phase != FS_CONTEXT_RECONF_PARAMS)
208 return -EBUSY;
209
210 return vfs_parse_fs_param(fc, param);
211 }
212 fc->phase = FS_CONTEXT_FAILED;
213 return ret;
214}
215
216/**
217 * sys_fsconfig - Set parameters and trigger actions on a context
218 * @fd: The filesystem context to act upon
219 * @cmd: The action to take
220 * @_key: Where appropriate, the parameter key to set
221 * @_value: Where appropriate, the parameter value to set
222 * @aux: Additional information for the value
223 *
224 * This system call is used to set parameters on a context, including
225 * superblock settings, data source and security labelling.
226 *
227 * Actions include triggering the creation of a superblock and the
228 * reconfiguration of the superblock attached to the specified context.
229 *
230 * When setting a parameter, @cmd indicates the type of value being proposed
231 * and @_key indicates the parameter to be altered.
232 *
233 * @_value and @aux are used to specify the value, should a value be required:
234 *
235 * (*) fsconfig_set_flag: No value is specified. The parameter must be boolean
236 * in nature. The key may be prefixed with "no" to invert the
237 * setting. @_value must be NULL and @aux must be 0.
238 *
239 * (*) fsconfig_set_string: A string value is specified. The parameter can be
240 * expecting boolean, integer, string or take a path. A conversion to an
241 * appropriate type will be attempted (which may include looking up as a
242 * path). @_value points to a NUL-terminated string and @aux must be 0.
243 *
244 * (*) fsconfig_set_binary: A binary blob is specified. @_value points to the
245 * blob and @aux indicates its size. The parameter must be expecting a
246 * blob.
247 *
248 * (*) fsconfig_set_path: A non-empty path is specified. The parameter must be
249 * expecting a path object. @_value points to a NUL-terminated string that
250 * is the path and @aux is a file descriptor at which to start a relative
251 * lookup or AT_FDCWD.
252 *
253 * (*) fsconfig_set_path_empty: As fsconfig_set_path, but with AT_EMPTY_PATH
254 * implied.
255 *
256 * (*) fsconfig_set_fd: An open file descriptor is specified. @_value must be
257 * NULL and @aux indicates the file descriptor.
258 */
259SYSCALL_DEFINE5(fsconfig,
260 int, fd,
261 unsigned int, cmd,
262 const char __user *, _key,
263 const void __user *, _value,
264 int, aux)
265{
266 struct fs_context *fc;
267 struct fd f;
268 int ret;
269
270 struct fs_parameter param = {
271 .type = fs_value_is_undefined,
272 };
273
274 if (fd < 0)
275 return -EINVAL;
276
277 switch (cmd) {
278 case FSCONFIG_SET_FLAG:
279 if (!_key || _value || aux)
280 return -EINVAL;
281 break;
282 case FSCONFIG_SET_STRING:
283 if (!_key || !_value || aux)
284 return -EINVAL;
285 break;
286 case FSCONFIG_SET_BINARY:
287 if (!_key || !_value || aux <= 0 || aux > 1024 * 1024)
288 return -EINVAL;
289 break;
290 case FSCONFIG_SET_PATH:
291 case FSCONFIG_SET_PATH_EMPTY:
292 if (!_key || !_value || (aux != AT_FDCWD && aux < 0))
293 return -EINVAL;
294 break;
295 case FSCONFIG_SET_FD:
296 if (!_key || _value || aux < 0)
297 return -EINVAL;
298 break;
299 case FSCONFIG_CMD_CREATE:
300 case FSCONFIG_CMD_RECONFIGURE:
301 if (_key || _value || aux)
302 return -EINVAL;
303 break;
304 default:
305 return -EOPNOTSUPP;
306 }
307
308 f = fdget(fd);
309 if (!f.file)
310 return -EBADF;
311 ret = -EINVAL;
312 if (f.file->f_op != &fscontext_fops)
313 goto out_f;
314
315 fc = f.file->private_data;
316 if (fc->ops == &legacy_fs_context_ops) {
317 switch (cmd) {
318 case FSCONFIG_SET_BINARY:
319 case FSCONFIG_SET_PATH:
320 case FSCONFIG_SET_PATH_EMPTY:
321 case FSCONFIG_SET_FD:
322 ret = -EOPNOTSUPP;
323 goto out_f;
324 }
325 }
326
327 if (_key) {
328 param.key = strndup_user(_key, 256);
329 if (IS_ERR(param.key)) {
330 ret = PTR_ERR(param.key);
331 goto out_f;
332 }
333 }
334
335 switch (cmd) {
336 case FSCONFIG_SET_FLAG:
337 param.type = fs_value_is_flag;
338 break;
339 case FSCONFIG_SET_STRING:
340 param.type = fs_value_is_string;
341 param.string = strndup_user(_value, 256);
342 if (IS_ERR(param.string)) {
343 ret = PTR_ERR(param.string);
344 goto out_key;
345 }
346 param.size = strlen(param.string);
347 break;
348 case FSCONFIG_SET_BINARY:
349 param.type = fs_value_is_blob;
350 param.size = aux;
351 param.blob = memdup_user_nul(_value, aux);
352 if (IS_ERR(param.blob)) {
353 ret = PTR_ERR(param.blob);
354 goto out_key;
355 }
356 break;
357 case FSCONFIG_SET_PATH:
358 param.type = fs_value_is_filename;
359 param.name = getname_flags(_value, 0, NULL);
360 if (IS_ERR(param.name)) {
361 ret = PTR_ERR(param.name);
362 goto out_key;
363 }
364 param.dirfd = aux;
365 param.size = strlen(param.name->name);
366 break;
367 case FSCONFIG_SET_PATH_EMPTY:
368 param.type = fs_value_is_filename_empty;
369 param.name = getname_flags(_value, LOOKUP_EMPTY, NULL);
370 if (IS_ERR(param.name)) {
371 ret = PTR_ERR(param.name);
372 goto out_key;
373 }
374 param.dirfd = aux;
375 param.size = strlen(param.name->name);
376 break;
377 case FSCONFIG_SET_FD:
378 param.type = fs_value_is_file;
379 ret = -EBADF;
380 param.file = fget(aux);
381 if (!param.file)
382 goto out_key;
383 break;
384 default:
385 break;
386 }
387
388 ret = mutex_lock_interruptible(&fc->uapi_mutex);
389 if (ret == 0) {
390 ret = vfs_fsconfig_locked(fc, cmd, &param);
391 mutex_unlock(&fc->uapi_mutex);
392 }
393
394 /* Clean up the our record of any value that we obtained from
395 * userspace. Note that the value may have been stolen by the LSM or
396 * filesystem, in which case the value pointer will have been cleared.
397 */
398 switch (cmd) {
399 case FSCONFIG_SET_STRING:
400 case FSCONFIG_SET_BINARY:
401 kfree(param.string);
402 break;
403 case FSCONFIG_SET_PATH:
404 case FSCONFIG_SET_PATH_EMPTY:
405 if (param.name)
406 putname(param.name);
407 break;
408 case FSCONFIG_SET_FD:
409 if (param.file)
410 fput(param.file);
411 break;
412 default:
413 break;
414 }
415out_key:
416 kfree(param.key);
417out_f:
418 fdput(f);
419 return ret;
420}