ceph: remove the capsnaps when removing caps
authorXiubo Li <xiubli@redhat.com>
Wed, 25 Aug 2021 13:45:43 +0000 (21:45 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 26 Sep 2021 12:09:00 +0000 (14:09 +0200)
[ Upstream commit a6d37ccdd240e80f26aaea0e62cda310e0e184d7 ]

capsnaps will take inode references via ihold when queueing to flush.
When force unmounting, the client will just close the sessions and
may never get a flush reply, causing a leak and inode ref leak.

Fix this by removing the capsnaps for an inode when removing the caps.

URL: https://tracker.ceph.com/issues/52295
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/ceph/caps.c
fs/ceph/mds_client.c
fs/ceph/super.h

index 678dac8365ed3248a58846105ac9332d0f8681d0..f303e0d87c3f64ee60698394210909813ccb0614 100644 (file)
@@ -3169,7 +3169,16 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
                                break;
                        }
                }
-               BUG_ON(!found);
+
+               if (!found) {
+                       /*
+                        * The capsnap should already be removed when removing
+                        * auth cap in the case of a forced unmount.
+                        */
+                       WARN_ON_ONCE(ci->i_auth_cap);
+                       goto unlock;
+               }
+
                capsnap->dirty_pages -= nr;
                if (capsnap->dirty_pages == 0) {
                        complete_capsnap = true;
@@ -3191,6 +3200,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
                     complete_capsnap ? " (complete capsnap)" : "");
        }
 
+unlock:
        spin_unlock(&ci->i_ceph_lock);
 
        if (last) {
@@ -3657,6 +3667,43 @@ out:
                iput(inode);
 }
 
+void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
+                          bool *wake_ci, bool *wake_mdsc)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+       bool ret;
+
+       lockdep_assert_held(&ci->i_ceph_lock);
+
+       dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
+
+       list_del_init(&capsnap->ci_item);
+       ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
+       if (wake_ci)
+               *wake_ci = ret;
+
+       spin_lock(&mdsc->cap_dirty_lock);
+       if (list_empty(&ci->i_cap_flush_list))
+               list_del_init(&ci->i_flushing_item);
+
+       ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
+       if (wake_mdsc)
+               *wake_mdsc = ret;
+       spin_unlock(&mdsc->cap_dirty_lock);
+}
+
+void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
+                        bool *wake_ci, bool *wake_mdsc)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+
+       lockdep_assert_held(&ci->i_ceph_lock);
+
+       WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
+       __ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
+}
+
 /*
  * Handle FLUSHSNAP_ACK.  MDS has flushed snap data to disk and we can
  * throw away our cap_snap.
@@ -3694,23 +3741,10 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
                             capsnap, capsnap->follows);
                }
        }
-       if (flushed) {
-               WARN_ON(capsnap->dirty_pages || capsnap->writing);
-               dout(" removing %p cap_snap %p follows %lld\n",
-                    inode, capsnap, follows);
-               list_del(&capsnap->ci_item);
-               wake_ci |= __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
-
-               spin_lock(&mdsc->cap_dirty_lock);
-
-               if (list_empty(&ci->i_cap_flush_list))
-                       list_del_init(&ci->i_flushing_item);
-
-               wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc,
-                                                         &capsnap->cap_flush);
-               spin_unlock(&mdsc->cap_dirty_lock);
-       }
+       if (flushed)
+               ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
        spin_unlock(&ci->i_ceph_lock);
+
        if (flushed) {
                ceph_put_snap_context(capsnap->context);
                ceph_put_cap_snap(capsnap);
index 46606fb5b886c1850afdcf4ed9a143ce4ca6e831..0f57b7d094578d0cf50f11ae63b7a807687029ad 100644 (file)
@@ -1587,14 +1587,39 @@ out:
        return ret;
 }
 
+static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_cap_snap *capsnap;
+       int capsnap_release = 0;
+
+       lockdep_assert_held(&ci->i_ceph_lock);
+
+       dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
+
+       while (!list_empty(&ci->i_cap_snaps)) {
+               capsnap = list_first_entry(&ci->i_cap_snaps,
+                                          struct ceph_cap_snap, ci_item);
+               __ceph_remove_capsnap(inode, capsnap, NULL, NULL);
+               ceph_put_snap_context(capsnap->context);
+               ceph_put_cap_snap(capsnap);
+               capsnap_release++;
+       }
+       wake_up_all(&ci->i_cap_wq);
+       wake_up_all(&mdsc->cap_flushing_wq);
+       return capsnap_release;
+}
+
 static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                                  void *arg)
 {
        struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
+       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
        LIST_HEAD(to_remove);
        bool dirty_dropped = false;
        bool invalidate = false;
+       int capsnap_release = 0;
 
        dout("removing cap %p, ci is %p, inode is %p\n",
             cap, ci, &ci->vfs_inode);
@@ -1602,7 +1627,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
        __ceph_remove_cap(cap, false);
        if (!ci->i_auth_cap) {
                struct ceph_cap_flush *cf;
-               struct ceph_mds_client *mdsc = fsc->mdsc;
 
                if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
                        if (inode->i_data.nrpages > 0)
@@ -1666,6 +1690,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
                        ci->i_prealloc_cap_flush = NULL;
                }
+
+               if (!list_empty(&ci->i_cap_snaps))
+                       capsnap_release = remove_capsnaps(mdsc, inode);
        }
        spin_unlock(&ci->i_ceph_lock);
        while (!list_empty(&to_remove)) {
@@ -1682,6 +1709,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                ceph_queue_invalidate(inode);
        if (dirty_dropped)
                iput(inode);
+       while (capsnap_release--)
+               iput(inode);
        return 0;
 }
 
index a8c460393b01bd8361beafe928e5a3c3491d976e..9362eeb5812d9358359d9f7fec56d5cb6f1f5af1 100644 (file)
@@ -1134,6 +1134,12 @@ extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
                                            int had);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
                                       struct ceph_snap_context *snapc);
+extern void __ceph_remove_capsnap(struct inode *inode,
+                                 struct ceph_cap_snap *capsnap,
+                                 bool *wake_ci, bool *wake_mdsc);
+extern void ceph_remove_capsnap(struct inode *inode,
+                               struct ceph_cap_snap *capsnap,
+                               bool *wake_ci, bool *wake_mdsc);
 extern void ceph_flush_snaps(struct ceph_inode_info *ci,
                             struct ceph_mds_session **psession);
 extern bool __ceph_should_report_size(struct ceph_inode_info *ci);