ceph: track and report error of async metadata operation
authorYan, Zheng <zyan@redhat.com>
Thu, 25 Jul 2019 12:16:42 +0000 (20:16 +0800)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 16 Sep 2019 10:06:23 +0000 (12:06 +0200)
Use errseq_t to track and report errors of async metadata operations,
similar to how kernel handles errors during writeback.

If any dirty caps or any unsafe request gets dropped during session
eviction, record -EIO in corresponding inode's i_meta_err. The error
will be reported by subsequent fsync,

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/caps.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/super.h

index ce0f5658720ab9d06764c1f63dbe3d267ffb336f..321ba9b30968bbd6d875c0ff00eb8d33555bc779 100644 (file)
@@ -2261,35 +2261,45 @@ static int unsafe_request_wait(struct inode *inode)
 
 int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
+       struct ceph_file_info *fi = file->private_data;
        struct inode *inode = file->f_mapping->host;
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 flush_tid;
-       int ret;
+       int ret, err;
        int dirty;
 
        dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
 
        ret = file_write_and_wait_range(file, start, end);
-       if (ret < 0)
-               goto out;
-
        if (datasync)
                goto out;
 
        dirty = try_flush_caps(inode, &flush_tid);
        dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
 
-       ret = unsafe_request_wait(inode);
+       err = unsafe_request_wait(inode);
 
        /*
         * only wait on non-file metadata writeback (the mds
         * can recover size and mtime, so we don't need to
         * wait for that)
         */
-       if (!ret && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
-               ret = wait_event_interruptible(ci->i_cap_wq,
+       if (!err && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
+               err = wait_event_interruptible(ci->i_cap_wq,
                                        caps_are_flushed(inode, flush_tid));
        }
+
+       if (err < 0)
+               ret = err;
+
+       if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) {
+               spin_lock(&file->f_lock);
+               err = errseq_check_and_advance(&ci->i_meta_err,
+                                              &fi->meta_err);
+               spin_unlock(&file->f_lock);
+               if (err < 0)
+                       ret = err;
+       }
 out:
        dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
        return ret;
index f657fabcb3eeadf45b4e3f5df210c5fbf0588450..75a1d12ec46def866ed1b8408903d3f5fe77fb43 100644 (file)
@@ -201,6 +201,7 @@ out:
 static int ceph_init_file_info(struct inode *inode, struct file *file,
                                        int fmode, bool isdir)
 {
+       struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_file_info *fi;
 
        dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
@@ -211,7 +212,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
                struct ceph_dir_file_info *dfi =
                        kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
                if (!dfi) {
-                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+                       ceph_put_fmode(ci, fmode); /* clean up */
                        return -ENOMEM;
                }
 
@@ -222,7 +223,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
        } else {
                fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
                if (!fi) {
-                       ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+                       ceph_put_fmode(ci, fmode); /* clean up */
                        return -ENOMEM;
                }
 
@@ -232,6 +233,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
        fi->fmode = fmode;
        spin_lock_init(&fi->rw_contexts_lock);
        INIT_LIST_HEAD(&fi->rw_contexts);
+       fi->meta_err = errseq_sample(&ci->i_meta_err);
 
        return 0;
 }
index 3b537e7038c7a4a9e97413bbe7e2665971bcafab..332433b490f5ab52e9fa35231519ee3cfc6d80db 100644 (file)
@@ -515,6 +515,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
        ceph_fscache_inode_init(ci);
 
+       ci->i_meta_err = 0;
+
        return &ci->vfs_inode;
 }
 
index 920e9f048bd8f4b38e26da4cb53ce5d9fb6bcda4..df4bea231017a2e1df77a4d739eec60022f90fee 100644 (file)
@@ -1270,6 +1270,7 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
 {
        struct ceph_mds_request *req;
        struct rb_node *p;
+       struct ceph_inode_info *ci;
 
        dout("cleanup_session_requests mds%d\n", session->s_mds);
        mutex_lock(&mdsc->mutex);
@@ -1278,6 +1279,16 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
                                       struct ceph_mds_request, r_unsafe_item);
                pr_warn_ratelimited(" dropping unsafe request %llu\n",
                                    req->r_tid);
+               if (req->r_target_inode) {
+                       /* dropping unsafe change of inode's attributes */
+                       ci = ceph_inode(req->r_target_inode);
+                       errseq_set(&ci->i_meta_err, -EIO);
+               }
+               if (req->r_unsafe_dir) {
+                       /* dropping unsafe directory operation */
+                       ci = ceph_inode(req->r_unsafe_dir);
+                       errseq_set(&ci->i_meta_err, -EIO);
+               }
                __unregister_request(mdsc, req);
        }
        /* zero r_attempts, so kick_requests() will re-send requests */
@@ -1370,7 +1381,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
        struct ceph_inode_info *ci = ceph_inode(inode);
        LIST_HEAD(to_remove);
-       bool drop = false;
+       bool dirty_dropped = false;
        bool invalidate = false;
 
        dout("removing cap %p, ci is %p, inode is %p\n",
@@ -1405,7 +1416,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                                inode, ceph_ino(inode));
                        ci->i_dirty_caps = 0;
                        list_del_init(&ci->i_dirty_item);
-                       drop = true;
+                       dirty_dropped = true;
                }
                if (!list_empty(&ci->i_flushing_item)) {
                        pr_warn_ratelimited(
@@ -1415,10 +1426,22 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        ci->i_flushing_caps = 0;
                        list_del_init(&ci->i_flushing_item);
                        mdsc->num_cap_flushing--;
-                       drop = true;
+                       dirty_dropped = true;
                }
                spin_unlock(&mdsc->cap_dirty_lock);
 
+               if (dirty_dropped) {
+                       errseq_set(&ci->i_meta_err, -EIO);
+
+                       if (ci->i_wrbuffer_ref_head == 0 &&
+                           ci->i_wr_ref == 0 &&
+                           ci->i_dirty_caps == 0 &&
+                           ci->i_flushing_caps == 0) {
+                               ceph_put_snap_context(ci->i_head_snapc);
+                               ci->i_head_snapc = NULL;
+                       }
+               }
+
                if (atomic_read(&ci->i_filelock_ref) > 0) {
                        /* make further file lock syscall return -EIO */
                        ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
@@ -1430,15 +1453,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
                        ci->i_prealloc_cap_flush = NULL;
                }
-
-               if (drop &&
-                  ci->i_wrbuffer_ref_head == 0 &&
-                  ci->i_wr_ref == 0 &&
-                  ci->i_dirty_caps == 0 &&
-                  ci->i_flushing_caps == 0) {
-                      ceph_put_snap_context(ci->i_head_snapc);
-                      ci->i_head_snapc = NULL;
-               }
        }
        spin_unlock(&ci->i_ceph_lock);
        while (!list_empty(&to_remove)) {
@@ -1452,7 +1466,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        wake_up_all(&ci->i_cap_wq);
        if (invalidate)
                ceph_queue_invalidate(inode);
-       if (drop)
+       if (dirty_dropped)
                iput(inode);
        return 0;
 }
index 6b9f1ee7de8575a7db70fcf49fc16769df01fa3e..66dfe5ebf006b876bbfe713e453059957b7035df 100644 (file)
@@ -395,6 +395,8 @@ struct ceph_inode_info {
        struct fscache_cookie *fscache;
        u32 i_fscache_gen;
 #endif
+       errseq_t i_meta_err;
+
        struct inode vfs_inode; /* at end */
 };
 
@@ -703,6 +705,8 @@ struct ceph_file_info {
 
        spinlock_t rw_contexts_lock;
        struct list_head rw_contexts;
+
+       errseq_t meta_err;
 };
 
 struct ceph_dir_file_info {