pNFS: Fix races between return-on-close and layoutreturn.
authorTrond Myklebust <trond.myklebust@primarydata.com>
Thu, 9 Jul 2015 16:40:01 +0000 (18:40 +0200)
committerTrond Myklebust <trond.myklebust@primarydata.com>
Sat, 11 Jul 2015 14:16:16 +0000 (16:16 +0200)
If one or more of the layout segments reports an error during I/O, then
we may have to send a layoutreturn to report the error back to the NFS
metadata server.
This patch ensures that the return-on-close code can detect the
outstanding layoutreturn, and not preempt it.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
fs/nfs/nfs4proc.c
fs/nfs/pnfs.c

index 671498ca36d761a3c2f2de7e22fad98d0ca2c1b7..c5c9e0d070f858de749f44098e4b4da11c345cdf 100644 (file)
@@ -7972,8 +7972,6 @@ static void nfs4_layoutreturn_release(void *calldata)
                pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
        pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
        pnfs_clear_layoutreturn_waitbit(lo);
-       clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
-       rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
        lo->plh_block_lgets--;
        spin_unlock(&lo->plh_inode->i_lock);
        pnfs_free_lseg_list(&freeme);
index 8e9f467e409c1ab909bef62a829c6363d8b2bb4f..27e2bcaa88da766522b69c9f379ea5c4f5cff5da 100644 (file)
@@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
 {
        struct pnfs_layout_segment *s;
 
-       if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+       if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
                return false;
 
        list_for_each_entry(s, &lo->plh_segs, pls_list)
@@ -362,6 +362,18 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
        return true;
 }
 
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               return false;
+       lo->plh_return_iomode = 0;
+       lo->plh_block_lgets++;
+       pnfs_get_layout_hdr(lo);
+       clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
+       return true;
+}
+
 static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
                struct pnfs_layout_hdr *lo, struct inode *inode)
 {
@@ -372,17 +384,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
        if (pnfs_layout_need_return(lo, lseg)) {
                nfs4_stateid stateid;
                enum pnfs_iomode iomode;
+               bool send;
 
                stateid = lo->plh_stateid;
                iomode = lo->plh_return_iomode;
-               /* decreased in pnfs_send_layoutreturn() */
-               lo->plh_block_lgets++;
-               lo->plh_return_iomode = 0;
+               send = pnfs_prepare_layoutreturn(lo);
                spin_unlock(&inode->i_lock);
-               pnfs_get_layout_hdr(lo);
-
-               /* Send an async layoutreturn so we dont deadlock */
-               pnfs_send_layoutreturn(lo, stateid, iomode, false);
+               if (send) {
+                       /* Send an async layoutreturn so we dont deadlock */
+                       pnfs_send_layoutreturn(lo, stateid, iomode, false);
+               }
        } else
                spin_unlock(&inode->i_lock);
 }
@@ -924,6 +935,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
        clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
        smp_mb__after_atomic();
        wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
+       rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
 static int
@@ -978,6 +990,7 @@ _pnfs_return_layout(struct inode *ino)
        LIST_HEAD(tmp_list);
        nfs4_stateid stateid;
        int status = 0, empty;
+       bool send;
 
        dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
 
@@ -1007,17 +1020,18 @@ _pnfs_return_layout(struct inode *ino)
        /* Don't send a LAYOUTRETURN if list was initially empty */
        if (empty) {
                spin_unlock(&ino->i_lock);
-               pnfs_put_layout_hdr(lo);
                dprintk("NFS: %s no layout segments to return\n", __func__);
-               goto out;
+               goto out_put_layout_hdr;
        }
 
        set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-       lo->plh_block_lgets++;
+       send = pnfs_prepare_layoutreturn(lo);
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
-
-       status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+       if (send)
+               status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+out_put_layout_hdr:
+       pnfs_put_layout_hdr(lo);
 out:
        dprintk("<-- %s status: %d\n", __func__, status);
        return status;
@@ -1097,13 +1111,9 @@ bool pnfs_roc(struct inode *ino)
 out_noroc:
        if (lo) {
                stateid = lo->plh_stateid;
-               layoutreturn =
-                       test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
-                                          &lo->plh_flags);
-               if (layoutreturn) {
-                       lo->plh_block_lgets++;
-                       pnfs_get_layout_hdr(lo);
-               }
+               if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                                          &lo->plh_flags))
+                       layoutreturn = pnfs_prepare_layoutreturn(lo);
        }
        spin_unlock(&ino->i_lock);
        if (layoutreturn) {
@@ -1163,16 +1173,14 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
         */
        *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
        stateid = lo->plh_stateid;
-       layoutreturn = test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
-                                          &lo->plh_flags);
-       if (layoutreturn) {
-               lo->plh_block_lgets++;
-               pnfs_get_layout_hdr(lo);
-       }
+       if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                                          &lo->plh_flags))
+               layoutreturn = pnfs_prepare_layoutreturn(lo);
+       if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
 
        spin_unlock(&ino->i_lock);
        if (layoutreturn) {
-               rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
                pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false);
                return true;
        }
@@ -1693,7 +1701,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
        spin_lock(&inode->i_lock);
        /* set failure bit so that pnfs path will be retried later */
        pnfs_layout_set_fail_bit(lo, iomode);
-       set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
        if (lo->plh_return_iomode == 0)
                lo->plh_return_iomode = range.iomode;
        else if (lo->plh_return_iomode != range.iomode)