Merge branch 'clk-qcom-sdm845' into clk-next
[linux-2.6-block.git] / fs / exportfs / expfs.c
CommitLineData
e38f9817
CH
1/*
2 * Copyright (C) Neil Brown 2002
3 * Copyright (C) Christoph Hellwig 2007
4 *
5 * This file contains the code mapping from inodes to NFS file handles,
6 * and for mapping back from file handles to dentries.
7 *
8 * For details on why we do all the strange and hairy things in here
dc7a0816 9 * take a look at Documentation/filesystems/nfs/Exporting.
e38f9817 10 */
a5694255 11#include <linux/exportfs.h>
1da177e4
LT
12#include <linux/fs.h>
13#include <linux/file.h>
14#include <linux/module.h>
d37065cd 15#include <linux/mount.h>
1da177e4 16#include <linux/namei.h>
745ca247 17#include <linux/sched.h>
5b825c3a 18#include <linux/cred.h>
1da177e4 19
10f11c34 20#define dprintk(fmt, args...) do{}while(0)
1da177e4 21
1da177e4 22
765927b2 23static int get_name(const struct path *path, char *name, struct dentry *child);
10f11c34
CH
24
25
e38f9817
CH
26static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir,
27 char *name, struct dentry *child)
10f11c34 28{
39655164 29 const struct export_operations *nop = dir->d_sb->s_export_op;
765927b2 30 struct path path = {.mnt = mnt, .dentry = dir};
10f11c34
CH
31
32 if (nop->get_name)
33 return nop->get_name(dir, name, child);
34 else
765927b2 35 return get_name(&path, name, child);
10f11c34 36}
1da177e4 37
fb66a198
CH
38/*
39 * Check if the dentry or any of it's aliases is acceptable.
40 */
e2f99018
CH
41static struct dentry *
42find_acceptable_alias(struct dentry *result,
43 int (*acceptable)(void *context, struct dentry *dentry),
44 void *context)
45{
46 struct dentry *dentry, *toput = NULL;
873feea0 47 struct inode *inode;
e2f99018 48
fb66a198
CH
49 if (acceptable(context, result))
50 return result;
51
873feea0
NP
52 inode = result->d_inode;
53 spin_lock(&inode->i_lock);
946e51f2 54 hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
dc0474be 55 dget(dentry);
873feea0 56 spin_unlock(&inode->i_lock);
e2f99018
CH
57 if (toput)
58 dput(toput);
59 if (dentry != result && acceptable(context, dentry)) {
60 dput(result);
61 return dentry;
62 }
873feea0 63 spin_lock(&inode->i_lock);
e2f99018
CH
64 toput = dentry;
65 }
873feea0 66 spin_unlock(&inode->i_lock);
e2f99018
CH
67
68 if (toput)
69 dput(toput);
70 return NULL;
71}
72
a056cc89
BF
73static bool dentry_connected(struct dentry *dentry)
74{
75 dget(dentry);
76 while (dentry->d_flags & DCACHE_DISCONNECTED) {
77 struct dentry *parent = dget_parent(dentry);
78
79 dput(dentry);
80 if (IS_ROOT(dentry)) {
81 dput(parent);
82 return false;
83 }
84 dentry = parent;
85 }
86 dput(dentry);
87 return true;
88}
89
0dbc018a
BF
90static void clear_disconnected(struct dentry *dentry)
91{
92 dget(dentry);
93 while (dentry->d_flags & DCACHE_DISCONNECTED) {
94 struct dentry *parent = dget_parent(dentry);
95
96 WARN_ON_ONCE(IS_ROOT(dentry));
97
98 spin_lock(&dentry->d_lock);
99 dentry->d_flags &= ~DCACHE_DISCONNECTED;
100 spin_unlock(&dentry->d_lock);
101
102 dput(dentry);
103 dentry = parent;
104 }
105 dput(dentry);
106}
107
bbf7a8a3
BF
108/*
109 * Reconnect a directory dentry with its parent.
110 *
111 * This can return a dentry, or NULL, or an error.
112 *
113 * In the first case the returned dentry is the parent of the given
114 * dentry, and may itself need to be reconnected to its parent.
115 *
116 * In the NULL case, a concurrent VFS operation has either renamed or
117 * removed this directory. The concurrent operation has reconnected our
118 * dentry, so we no longer need to.
119 */
120static struct dentry *reconnect_one(struct vfsmount *mnt,
121 struct dentry *dentry, char *nbuf)
122{
123 struct dentry *parent;
124 struct dentry *tmp;
125 int err;
126
127 parent = ERR_PTR(-EACCES);
5955102c 128 inode_lock(dentry->d_inode);
bbf7a8a3
BF
129 if (mnt->mnt_sb->s_export_op->get_parent)
130 parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
5955102c 131 inode_unlock(dentry->d_inode);
bbf7a8a3
BF
132
133 if (IS_ERR(parent)) {
134 dprintk("%s: get_parent of %ld failed, err %d\n",
135 __func__, dentry->d_inode->i_ino, PTR_ERR(parent));
136 return parent;
137 }
138
139 dprintk("%s: find name of %lu in %lu\n", __func__,
140 dentry->d_inode->i_ino, parent->d_inode->i_ino);
141 err = exportfs_get_name(mnt, parent, nbuf, dentry);
142 if (err == -ENOENT)
143 goto out_reconnected;
144 if (err)
145 goto out_err;
146 dprintk("%s: found name: %s\n", __func__, nbuf);
383d4e8a 147 tmp = lookup_one_len_unlocked(nbuf, parent, strlen(nbuf));
bbf7a8a3
BF
148 if (IS_ERR(tmp)) {
149 dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
150 goto out_err;
151 }
152 if (tmp != dentry) {
383d4e8a
AV
153 /*
154 * Somebody has renamed it since exportfs_get_name();
155 * great, since it could've only been renamed if it
156 * got looked up and thus connected, and it would
157 * remain connected afterwards. We are done.
158 */
bbf7a8a3
BF
159 dput(tmp);
160 goto out_reconnected;
161 }
162 dput(tmp);
163 if (IS_ROOT(dentry)) {
164 err = -ESTALE;
165 goto out_err;
166 }
167 return parent;
168
169out_err:
170 dput(parent);
171 return ERR_PTR(err);
172out_reconnected:
173 dput(parent);
174 /*
175 * Someone must have renamed our entry into another parent, in
176 * which case it has been reconnected by the rename.
177 *
178 * Or someone removed it entirely, in which case filehandle
179 * lookup will succeed but the directory is now IS_DEAD and
180 * subsequent operations on it will fail.
181 *
182 * Alternatively, maybe there was no race at all, and the
183 * filesystem is just corrupt and gave us a parent that doesn't
184 * actually contain any entry pointing to this inode. So,
185 * double check that this worked and return -ESTALE if not:
186 */
187 if (!dentry_connected(dentry))
188 return ERR_PTR(-ESTALE);
189 return NULL;
190}
191
019ab801
CH
192/*
193 * Make sure target_dir is fully connected to the dentry tree.
1da177e4 194 *
78cee9a8
BF
195 * On successful return, DCACHE_DISCONNECTED will be cleared on
196 * target_dir, and target_dir->d_parent->...->d_parent will reach the
197 * root of the filesystem.
198 *
199 * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
200 * But the converse is not true: target_dir may have DCACHE_DISCONNECTED
201 * set but already be connected. In that case we'll verify the
202 * connection to root and then clear the flag.
203 *
204 * Note that target_dir could be removed by a concurrent operation. In
205 * that case reconnect_path may still succeed with target_dir fully
206 * connected, but further operations using the filehandle will fail when
207 * necessary (due to S_DEAD being set on the directory).
1da177e4 208 */
019ab801 209static int
f3f8e175 210reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
1da177e4 211{
f27c9298 212 struct dentry *dentry, *parent;
1da177e4 213
f27c9298 214 dentry = dget(target_dir);
1da177e4 215
f27c9298 216 while (dentry->d_flags & DCACHE_DISCONNECTED) {
efbf201f 217 BUG_ON(dentry == mnt->mnt_sb->s_root);
854ff5ca 218
f27c9298
BF
219 if (IS_ROOT(dentry))
220 parent = reconnect_one(mnt, dentry, nbuf);
221 else
222 parent = dget_parent(dentry);
223
224 if (!parent)
0dbc018a 225 break;
efbf201f 226 dput(dentry);
f27c9298
BF
227 if (IS_ERR(parent))
228 return PTR_ERR(parent);
229 dentry = parent;
1da177e4 230 }
f27c9298 231 dput(dentry);
a056cc89 232 clear_disconnected(target_dir);
019ab801
CH
233 return 0;
234}
235
1da177e4 236struct getdents_callback {
5c0ba4e0 237 struct dir_context ctx;
1da177e4
LT
238 char *name; /* name that was found. It already points to a
239 buffer NAME_MAX+1 is size */
950ee956 240 u64 ino; /* the inum we are looking for */
1da177e4
LT
241 int found; /* inode matched? */
242 int sequence; /* sequence counter */
243};
244
245/*
246 * A rather strange filldir function to capture
247 * the name matching the specified inode number.
248 */
ac7576f4 249static int filldir_one(struct dir_context *ctx, const char *name, int len,
afefdbb2 250 loff_t pos, u64 ino, unsigned int d_type)
1da177e4 251{
ac7576f4
MS
252 struct getdents_callback *buf =
253 container_of(ctx, struct getdents_callback, ctx);
1da177e4
LT
254 int result = 0;
255
256 buf->sequence++;
dfc59e2c 257 if (buf->ino == ino && len <= NAME_MAX) {
1da177e4
LT
258 memcpy(buf->name, name, len);
259 buf->name[len] = '\0';
260 buf->found = 1;
261 result = -1;
262 }
263 return result;
264}
265
266/**
267 * get_name - default export_operations->get_name function
00f01791 268 * @path: the directory in which to find a name
1da177e4
LT
269 * @name: a pointer to a %NAME_MAX+1 char buffer to store the name
270 * @child: the dentry for the child directory.
271 *
272 * calls readdir on the parent until it finds an entry with
273 * the same inode number as the child, and returns that.
274 */
765927b2 275static int get_name(const struct path *path, char *name, struct dentry *child)
1da177e4 276{
745ca247 277 const struct cred *cred = current_cred();
765927b2 278 struct inode *dir = path->dentry->d_inode;
1da177e4
LT
279 int error;
280 struct file *file;
950ee956
BF
281 struct kstat stat;
282 struct path child_path = {
283 .mnt = path->mnt,
284 .dentry = child,
285 };
ac6614b7
AV
286 struct getdents_callback buffer = {
287 .ctx.actor = filldir_one,
288 .name = name,
ac6614b7 289 };
1da177e4
LT
290
291 error = -ENOTDIR;
292 if (!dir || !S_ISDIR(dir->i_mode))
293 goto out;
294 error = -EINVAL;
295 if (!dir->i_fop)
296 goto out;
950ee956
BF
297 /*
298 * inode->i_ino is unsigned long, kstat->ino is u64, so the
299 * former would be insufficient on 32-bit hosts when the
300 * filesystem supports 64-bit inode numbers. So we need to
301 * actually call ->getattr, not just read i_ino:
302 */
a528d35e
DH
303 error = vfs_getattr_nosec(&child_path, &stat,
304 STATX_INO, AT_STATX_SYNC_AS_STAT);
950ee956
BF
305 if (error)
306 return error;
307 buffer.ino = stat.ino;
1da177e4
LT
308 /*
309 * Open the directory ...
310 */
765927b2 311 file = dentry_open(path, O_RDONLY, cred);
1da177e4
LT
312 error = PTR_ERR(file);
313 if (IS_ERR(file))
314 goto out;
315
316 error = -EINVAL;
61922694 317 if (!file->f_op->iterate && !file->f_op->iterate_shared)
1da177e4
LT
318 goto out_close;
319
1da177e4
LT
320 buffer.sequence = 0;
321 while (1) {
322 int old_seq = buffer.sequence;
323
5c0ba4e0 324 error = iterate_dir(file, &buffer.ctx);
53c9c5c0
AV
325 if (buffer.found) {
326 error = 0;
327 break;
328 }
1da177e4
LT
329
330 if (error < 0)
331 break;
332
1da177e4
LT
333 error = -ENOENT;
334 if (old_seq == buffer.sequence)
335 break;
336 }
337
338out_close:
339 fput(file);
340out:
341 return error;
342}
343
1da177e4
LT
344/**
345 * export_encode_fh - default export_operations->encode_fh function
b0b0382b 346 * @inode: the object to encode
00f01791 347 * @fid: where to store the file handle fragment
1da177e4 348 * @max_len: maximum length to store there
b0b0382b 349 * @parent: parent directory inode, if wanted
1da177e4
LT
350 *
351 * This default encode_fh function assumes that the 32 inode number
352 * is suitable for locating an inode, and that the generation number
353 * can be used to check that it is still valid. It places them in the
354 * filehandle fragment where export_decode_fh expects to find them.
355 */
b0b0382b
AV
356static int export_encode_fh(struct inode *inode, struct fid *fid,
357 int *max_len, struct inode *parent)
1da177e4 358{
1da177e4 359 int len = *max_len;
6e91ea2b 360 int type = FILEID_INO32_GEN;
5fe0c237 361
b0b0382b 362 if (parent && (len < 4)) {
5fe0c237 363 *max_len = 4;
216b6cbd 364 return FILEID_INVALID;
5fe0c237
AK
365 } else if (len < 2) {
366 *max_len = 2;
216b6cbd 367 return FILEID_INVALID;
5fe0c237 368 }
1da177e4
LT
369
370 len = 2;
6e91ea2b
CH
371 fid->i32.ino = inode->i_ino;
372 fid->i32.gen = inode->i_generation;
b0b0382b 373 if (parent) {
6e91ea2b
CH
374 fid->i32.parent_ino = parent->i_ino;
375 fid->i32.parent_gen = parent->i_generation;
1da177e4 376 len = 4;
6e91ea2b 377 type = FILEID_INO32_GEN_PARENT;
1da177e4
LT
378 }
379 *max_len = len;
380 return type;
381}
382
711c7bf9
CG
383int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
384 int *max_len, struct inode *parent)
385{
386 const struct export_operations *nop = inode->i_sb->s_export_op;
387
388 if (nop && nop->encode_fh)
389 return nop->encode_fh(inode, fid->raw, max_len, parent);
390
391 return export_encode_fh(inode, fid, max_len, parent);
392}
393EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
394
6e91ea2b 395int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
d37065cd
CH
396 int connectable)
397{
10f11c34 398 int error;
b0b0382b
AV
399 struct dentry *p = NULL;
400 struct inode *inode = dentry->d_inode, *parent = NULL;
d37065cd 401
b0b0382b
AV
402 if (connectable && !S_ISDIR(inode->i_mode)) {
403 p = dget_parent(dentry);
404 /*
405 * note that while p might've ceased to be our parent already,
406 * it's still pinned by and still positive.
407 */
408 parent = p->d_inode;
409 }
711c7bf9
CG
410
411 error = exportfs_encode_inode_fh(inode, fid, max_len, parent);
b0b0382b 412 dput(p);
10f11c34
CH
413
414 return error;
d37065cd
CH
415}
416EXPORT_SYMBOL_GPL(exportfs_encode_fh);
417
6e91ea2b
CH
418struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
419 int fh_len, int fileid_type,
420 int (*acceptable)(void *, struct dentry *), void *context)
d37065cd 421{
39655164 422 const struct export_operations *nop = mnt->mnt_sb->s_export_op;
2596110a 423 struct dentry *result, *alias;
f3f8e175 424 char nbuf[NAME_MAX+1];
2596110a 425 int err;
d37065cd 426
2596110a
CH
427 /*
428 * Try to get any dentry for the given file handle from the filesystem.
429 */
becfd1f3
AK
430 if (!nop || !nop->fh_to_dentry)
431 return ERR_PTR(-ESTALE);
2596110a 432 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
09bb8bff
N
433 if (PTR_ERR(result) == -ENOMEM)
434 return ERR_CAST(result);
435 if (IS_ERR_OR_NULL(result))
436 return ERR_PTR(-ESTALE);
2596110a 437
8a22efa1
AG
438 /*
439 * If no acceptance criteria was specified by caller, a disconnected
440 * dentry is also accepatable. Callers may use this mode to query if
441 * file handle is stale or to get a reference to an inode without
442 * risking the high overhead caused by directory reconnect.
443 */
444 if (!acceptable)
445 return result;
446
e36cb0b8 447 if (d_is_dir(result)) {
2596110a
CH
448 /*
449 * This request is for a directory.
450 *
451 * On the positive side there is only one dentry for each
452 * directory inode. On the negative side this implies that we
453 * to ensure our dentry is connected all the way up to the
454 * filesystem root.
455 */
456 if (result->d_flags & DCACHE_DISCONNECTED) {
f3f8e175 457 err = reconnect_path(mnt, result, nbuf);
2596110a
CH
458 if (err)
459 goto err_result;
460 }
461
462 if (!acceptable(context, result)) {
463 err = -EACCES;
464 goto err_result;
465 }
466
467 return result;
10f11c34 468 } else {
2596110a
CH
469 /*
470 * It's not a directory. Life is a little more complicated.
471 */
472 struct dentry *target_dir, *nresult;
2596110a
CH
473
474 /*
475 * See if either the dentry we just got from the filesystem
476 * or any alias for it is acceptable. This is always true
477 * if this filesystem is exported without the subtreecheck
478 * option. If the filesystem is exported with the subtree
479 * check option there's a fair chance we need to look at
480 * the parent directory in the file handle and make sure
481 * it's connected to the filesystem root.
482 */
483 alias = find_acceptable_alias(result, acceptable, context);
484 if (alias)
485 return alias;
486
487 /*
488 * Try to extract a dentry for the parent directory from the
489 * file handle. If this fails we'll have to give up.
490 */
491 err = -ESTALE;
492 if (!nop->fh_to_parent)
493 goto err_result;
494
495 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid,
496 fh_len, fileid_type);
a4f4d6df
BF
497 if (!target_dir)
498 goto err_result;
2596110a
CH
499 err = PTR_ERR(target_dir);
500 if (IS_ERR(target_dir))
501 goto err_result;
502
503 /*
504 * And as usual we need to make sure the parent directory is
505 * connected to the filesystem root. The VFS really doesn't
506 * like disconnected directories..
507 */
f3f8e175 508 err = reconnect_path(mnt, target_dir, nbuf);
2596110a
CH
509 if (err) {
510 dput(target_dir);
511 goto err_result;
512 }
513
514 /*
515 * Now that we've got both a well-connected parent and a
516 * dentry for the inode we're after, make sure that our
517 * inode is actually connected to the parent.
518 */
e38f9817 519 err = exportfs_get_name(mnt, target_dir, nbuf, result);
2596110a 520 if (!err) {
5955102c 521 inode_lock(target_dir->d_inode);
2596110a
CH
522 nresult = lookup_one_len(nbuf, target_dir,
523 strlen(nbuf));
5955102c 524 inode_unlock(target_dir->d_inode);
2596110a
CH
525 if (!IS_ERR(nresult)) {
526 if (nresult->d_inode) {
527 dput(result);
528 result = nresult;
529 } else
530 dput(nresult);
531 }
532 }
533
534 /*
535 * At this point we are done with the parent, but it's pinned
536 * by the child dentry anyway.
537 */
538 dput(target_dir);
539
540 /*
541 * And finally make sure the dentry is actually acceptable
542 * to NFSD.
543 */
544 alias = find_acceptable_alias(result, acceptable, context);
545 if (!alias) {
546 err = -EACCES;
547 goto err_result;
548 }
549
550 return alias;
10f11c34
CH
551 }
552
2596110a
CH
553 err_result:
554 dput(result);
09bb8bff
N
555 if (err != -ENOMEM)
556 err = -ESTALE;
2596110a 557 return ERR_PTR(err);
d37065cd
CH
558}
559EXPORT_SYMBOL_GPL(exportfs_decode_fh);
560
1da177e4 561MODULE_LICENSE("GPL");