* ceph_mdsc_build_path - build a path string to a given dentry
* @mdsc: mds client
* @dentry: dentry to which path should be built
- * @plen: returned length of string
- * @pbase: returned base inode number
+ * @path_info: output path, length, base ino+snap, and freepath ownership flag
* @for_wire: is this path going to be sent to the MDS?
*
* Build a string that represents the path to the dentry. This is mostly called
* foo/.snap/bar -> foo//bar
*/
char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry,
- int *plen, u64 *pbase, int for_wire)
+ struct ceph_path_info *path_info, int for_wire)
{
struct ceph_client *cl = mdsc->fsc->client;
struct dentry *cur;
return ERR_PTR(-ENAMETOOLONG);
}
- *pbase = base;
- *plen = PATH_MAX - 1 - pos;
+ /* Initialize the output structure */
+ memset(path_info, 0, sizeof(*path_info));
+
+ path_info->vino.ino = base;
+ path_info->pathlen = PATH_MAX - 1 - pos;
+ path_info->path = path + pos;
+ path_info->freepath = true;
+
+ /* Set snap from dentry if available */
+ if (d_inode(dentry))
+ path_info->vino.snap = ceph_snap(d_inode(dentry));
+ else
+ path_info->vino.snap = CEPH_NOSNAP;
+
doutc(cl, "on %p %d built %llx '%.*s'\n", dentry, d_count(dentry),
- base, *plen, path + pos);
+ base, PATH_MAX - 1 - pos, path + pos);
return path + pos;
}
static int build_dentry_path(struct ceph_mds_client *mdsc, struct dentry *dentry,
- struct inode *dir, const char **ppath, int *ppathlen,
- u64 *pino, bool *pfreepath, bool parent_locked)
+ struct inode *dir, struct ceph_path_info *path_info,
+ bool parent_locked)
{
char *path;
dir = d_inode_rcu(dentry->d_parent);
if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP &&
!IS_ENCRYPTED(dir)) {
- *pino = ceph_ino(dir);
+ path_info->vino.ino = ceph_ino(dir);
+ path_info->vino.snap = ceph_snap(dir);
rcu_read_unlock();
- *ppath = dentry->d_name.name;
- *ppathlen = dentry->d_name.len;
+ path_info->path = dentry->d_name.name;
+ path_info->pathlen = dentry->d_name.len;
+ path_info->freepath = false;
return 0;
}
rcu_read_unlock();
- path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1);
+ path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1);
if (IS_ERR(path))
return PTR_ERR(path);
- *ppath = path;
- *pfreepath = true;
+ /*
+ * ceph_mdsc_build_path already fills path_info, including snap handling.
+ */
return 0;
}
-static int build_inode_path(struct inode *inode,
- const char **ppath, int *ppathlen, u64 *pino,
- bool *pfreepath)
+static int build_inode_path(struct inode *inode, struct ceph_path_info *path_info)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
struct dentry *dentry;
char *path;
if (ceph_snap(inode) == CEPH_NOSNAP) {
- *pino = ceph_ino(inode);
- *ppathlen = 0;
+ path_info->vino.ino = ceph_ino(inode);
+ path_info->vino.snap = ceph_snap(inode);
+ path_info->pathlen = 0;
+ path_info->freepath = false;
return 0;
}
dentry = d_find_alias(inode);
- path = ceph_mdsc_build_path(mdsc, dentry, ppathlen, pino, 1);
+ path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1);
dput(dentry);
if (IS_ERR(path))
return PTR_ERR(path);
- *ppath = path;
- *pfreepath = true;
+ /*
+ * ceph_mdsc_build_path already fills path_info, including snap from dentry.
+ * Override with inode's snap since that's what this function is for.
+ */
+ path_info->vino.snap = ceph_snap(inode);
return 0;
}
*/
static int set_request_path_attr(struct ceph_mds_client *mdsc, struct inode *rinode,
struct dentry *rdentry, struct inode *rdiri,
- const char *rpath, u64 rino, const char **ppath,
- int *pathlen, u64 *ino, bool *freepath,
+ const char *rpath, u64 rino,
+ struct ceph_path_info *path_info,
bool parent_locked)
{
struct ceph_client *cl = mdsc->fsc->client;
int r = 0;
+ /* Initialize the output structure */
+ memset(path_info, 0, sizeof(*path_info));
+
if (rinode) {
- r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
+ r = build_inode_path(rinode, path_info);
doutc(cl, " inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
ceph_snap(rinode));
} else if (rdentry) {
- r = build_dentry_path(mdsc, rdentry, rdiri, ppath, pathlen, ino,
- freepath, parent_locked);
- doutc(cl, " dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath);
+ r = build_dentry_path(mdsc, rdentry, rdiri, path_info, parent_locked);
+ doutc(cl, " dentry %p %llx/%.*s\n", rdentry, path_info->vino.ino,
+ path_info->pathlen, path_info->path);
} else if (rpath || rino) {
- *ino = rino;
- *ppath = rpath;
- *pathlen = rpath ? strlen(rpath) : 0;
- doutc(cl, " path %.*s\n", *pathlen, rpath);
+ path_info->vino.ino = rino;
+ path_info->vino.snap = CEPH_NOSNAP;
+ path_info->path = rpath;
+ path_info->pathlen = rpath ? strlen(rpath) : 0;
+ path_info->freepath = false;
+
+ doutc(cl, " path %.*s\n", path_info->pathlen, rpath);
}
return r;
struct ceph_client *cl = mdsc->fsc->client;
struct ceph_msg *msg;
struct ceph_mds_request_head_legacy *lhead;
- const char *path1 = NULL;
- const char *path2 = NULL;
- u64 ino1 = 0, ino2 = 0;
- int pathlen1 = 0, pathlen2 = 0;
- bool freepath1 = false, freepath2 = false;
+ struct ceph_path_info path_info1 = {0};
+ struct ceph_path_info path_info2 = {0};
struct dentry *old_dentry = NULL;
int len;
u16 releases;
u16 request_head_version = mds_supported_head_version(session);
kuid_t caller_fsuid = req->r_cred->fsuid;
kgid_t caller_fsgid = req->r_cred->fsgid;
+ bool parent_locked = test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry,
- req->r_parent, req->r_path1, req->r_ino1.ino,
- &path1, &pathlen1, &ino1, &freepath1,
- test_bit(CEPH_MDS_R_PARENT_LOCKED,
- &req->r_req_flags));
+ req->r_parent, req->r_path1, req->r_ino1.ino,
+ &path_info1, parent_locked);
if (ret < 0) {
msg = ERR_PTR(ret);
goto out;
}
+ /*
+ * When the parent directory's i_rwsem is *not* locked, req->r_parent may
+ * have become stale (e.g. after a concurrent rename) between the time the
+ * dentry was looked up and now. If we detect that the stored r_parent
+ * does not match the inode number we just encoded for the request, switch
+ * to the correct inode so that the MDS receives a valid parent reference.
+ */
+ if (!parent_locked && req->r_parent && path_info1.vino.ino &&
+ ceph_ino(req->r_parent) != path_info1.vino.ino) {
+ struct inode *old_parent = req->r_parent;
+ struct inode *correct_dir = ceph_get_inode(mdsc->fsc->sb, path_info1.vino, NULL);
+ if (!IS_ERR(correct_dir)) {
+ WARN_ONCE(1, "ceph: r_parent mismatch (had %llx wanted %llx) - updating\n",
+ ceph_ino(old_parent), path_info1.vino.ino);
+ /*
+ * Transfer CEPH_CAP_PIN from the old parent to the new one.
+ * The pin was taken earlier in ceph_mdsc_submit_request().
+ */
+ ceph_put_cap_refs(ceph_inode(old_parent), CEPH_CAP_PIN);
+ iput(old_parent);
+ req->r_parent = correct_dir;
+ ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
+ }
+ }
+
/* If r_old_dentry is set, then assume that its parent is locked */
if (req->r_old_dentry &&
!(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
old_dentry = req->r_old_dentry;
ret = set_request_path_attr(mdsc, NULL, old_dentry,
- req->r_old_dentry_dir,
- req->r_path2, req->r_ino2.ino,
- &path2, &pathlen2, &ino2, &freepath2, true);
+ req->r_old_dentry_dir,
+ req->r_path2, req->r_ino2.ino,
+ &path_info2, true);
if (ret < 0) {
msg = ERR_PTR(ret);
goto out_free1;
/* filepaths */
len += 2 * (1 + sizeof(u32) + sizeof(u64));
- len += pathlen1 + pathlen2;
+ len += path_info1.pathlen + path_info2.pathlen;
/* cap releases */
len += sizeof(struct ceph_mds_request_release) *
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
if (req->r_dentry_drop)
- len += pathlen1;
+ len += path_info1.pathlen;
if (req->r_old_dentry_drop)
- len += pathlen2;
+ len += path_info2.pathlen;
/* MClientRequest tail */
lhead->ino = cpu_to_le64(req->r_deleg_ino);
lhead->args = req->r_args;
- ceph_encode_filepath(&p, end, ino1, path1);
- ceph_encode_filepath(&p, end, ino2, path2);
+ ceph_encode_filepath(&p, end, path_info1.vino.ino, path_info1.path);
+ ceph_encode_filepath(&p, end, path_info2.vino.ino, path_info2.path);
/* make note of release offset, in case we need to replay */
req->r_request_release_offset = p - msg->front.iov_base;
msg->hdr.data_off = cpu_to_le16(0);
out_free2:
- if (freepath2)
- ceph_mdsc_free_path((char *)path2, pathlen2);
+ ceph_mdsc_free_path_info(&path_info2);
out_free1:
- if (freepath1)
- ceph_mdsc_free_path((char *)path1, pathlen1);
+ ceph_mdsc_free_path_info(&path_info1);
out:
return msg;
out_err:
struct ceph_pagelist *pagelist = recon_state->pagelist;
struct dentry *dentry;
struct ceph_cap *cap;
- char *path;
- int pathlen = 0, err;
- u64 pathbase;
+ struct ceph_path_info path_info = {0};
+ int err;
u64 snap_follows;
dentry = d_find_primary(inode);
if (dentry) {
/* set pathbase to parent dir when msg_version >= 2 */
- path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase,
+ char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info,
recon_state->msg_version >= 2);
dput(dentry);
if (IS_ERR(path)) {
err = PTR_ERR(path);
goto out_err;
}
- } else {
- path = NULL;
- pathbase = 0;
}
spin_lock(&ci->i_ceph_lock);
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v2.issued = cpu_to_le32(cap->issued);
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
- rec.v2.pathbase = cpu_to_le64(pathbase);
+ rec.v2.pathbase = cpu_to_le64(path_info.vino.ino);
rec.v2.flock_len = (__force __le32)
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
} else {
ts = inode_get_atime(inode);
ceph_encode_timespec64(&rec.v1.atime, &ts);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
- rec.v1.pathbase = cpu_to_le64(pathbase);
+ rec.v1.pathbase = cpu_to_le64(path_info.vino.ino);
}
if (list_empty(&ci->i_cap_snaps)) {
sizeof(struct ceph_filelock);
rec.v2.flock_len = cpu_to_le32(struct_len);
- struct_len += sizeof(u32) + pathlen + sizeof(rec.v2);
+ struct_len += sizeof(u32) + path_info.pathlen + sizeof(rec.v2);
if (struct_v >= 2)
struct_len += sizeof(u64); /* snap_follows */
ceph_pagelist_encode_8(pagelist, 1);
ceph_pagelist_encode_32(pagelist, struct_len);
}
- ceph_pagelist_encode_string(pagelist, path, pathlen);
+ ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen);
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
ceph_locks_to_pagelist(flocks, pagelist,
num_fcntl_locks, num_flock_locks);
} else {
err = ceph_pagelist_reserve(pagelist,
sizeof(u64) + sizeof(u32) +
- pathlen + sizeof(rec.v1));
+ path_info.pathlen + sizeof(rec.v1));
if (err)
goto out_err;
ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
- ceph_pagelist_encode_string(pagelist, path, pathlen);
+ ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen);
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
}
out_err:
- ceph_mdsc_free_path(path, pathlen);
+ ceph_mdsc_free_path_info(&path_info);
if (!err)
recon_state->nr_caps++;
return err;