NFSv4.1/pnfs: Fix a close/delegreturn hang when return-on-close is set
[linux-2.6-block.git] / fs / nfs / pnfs.c
index 0ba9a02c95664960f8c0f46ea97249bd8653fe16..6aabbb6540216f9d23ff54cd79fcd402b2dd6e7a 100644 (file)
@@ -352,7 +352,7 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
 {
        struct pnfs_layout_segment *s;
 
-       if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+       if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
                return false;
 
        list_for_each_entry(s, &lo->plh_segs, pls_list)
@@ -362,6 +362,17 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
        return true;
 }
 
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               return false;
+       lo->plh_return_iomode = 0;
+       pnfs_get_layout_hdr(lo);
+       clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
+       return true;
+}
+
 static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
                struct pnfs_layout_hdr *lo, struct inode *inode)
 {
@@ -372,17 +383,16 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
        if (pnfs_layout_need_return(lo, lseg)) {
                nfs4_stateid stateid;
                enum pnfs_iomode iomode;
+               bool send;
 
                stateid = lo->plh_stateid;
                iomode = lo->plh_return_iomode;
-               /* decreased in pnfs_send_layoutreturn() */
-               lo->plh_block_lgets++;
-               lo->plh_return_iomode = 0;
+               send = pnfs_prepare_layoutreturn(lo);
                spin_unlock(&inode->i_lock);
-               pnfs_get_layout_hdr(lo);
-
-               /* Send an async layoutreturn so we dont deadlock */
-               pnfs_send_layoutreturn(lo, stateid, iomode, false);
+               if (send) {
+                       /* Send an async layoutreturn so we dont deadlock */
+                       pnfs_send_layoutreturn(lo, stateid, iomode, false);
+               }
        } else
                spin_unlock(&inode->i_lock);
 }
@@ -411,6 +421,10 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
                pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
 
        if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
+               if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
+                       spin_unlock(&inode->i_lock);
+                       return;
+               }
                pnfs_get_layout_hdr(lo);
                pnfs_layout_remove_lseg(lo, lseg);
                spin_unlock(&inode->i_lock);
@@ -451,6 +465,8 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg)
                test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
        if (atomic_dec_and_test(&lseg->pls_refcount)) {
                struct pnfs_layout_hdr *lo = lseg->pls_layout;
+               if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags))
+                       return;
                pnfs_get_layout_hdr(lo);
                pnfs_layout_remove_lseg(lo, lseg);
                pnfs_free_lseg_async(lseg);
@@ -800,25 +816,12 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
        return !pnfs_seqid_is_newer(seqid, lo->plh_barrier);
 }
 
-static bool
-pnfs_layout_returning(const struct pnfs_layout_hdr *lo,
-                     struct pnfs_layout_range *range)
-{
-       return test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
-               (lo->plh_return_iomode == IOMODE_ANY ||
-                lo->plh_return_iomode == range->iomode);
-}
-
 /* lget is set to 1 if called from inside send_layoutget call chain */
 static bool
-pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo,
-                       struct pnfs_layout_range *range, int lget)
+pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)
 {
        return lo->plh_block_lgets ||
-               test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
-               (list_empty(&lo->plh_segs) &&
-                (atomic_read(&lo->plh_outstanding) > lget)) ||
-               pnfs_layout_returning(lo, range);
+               test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
 }
 
 int
@@ -830,7 +833,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
 
        dprintk("--> %s\n", __func__);
        spin_lock(&lo->plh_inode->i_lock);
-       if (pnfs_layoutgets_blocked(lo, range, 1)) {
+       if (pnfs_layoutgets_blocked(lo)) {
                status = -EAGAIN;
        } else if (!nfs4_valid_open_stateid(open_state)) {
                status = -EBADF;
@@ -924,6 +927,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
        clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags);
        smp_mb__after_atomic();
        wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
+       rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
 static int
@@ -938,9 +942,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
        if (unlikely(lrp == NULL)) {
                status = -ENOMEM;
                spin_lock(&ino->i_lock);
-               lo->plh_block_lgets--;
                pnfs_clear_layoutreturn_waitbit(lo);
-               rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
                spin_unlock(&ino->i_lock);
                pnfs_put_layout_hdr(lo);
                goto out;
@@ -978,6 +980,7 @@ _pnfs_return_layout(struct inode *ino)
        LIST_HEAD(tmp_list);
        nfs4_stateid stateid;
        int status = 0, empty;
+       bool send;
 
        dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
 
@@ -1007,17 +1010,18 @@ _pnfs_return_layout(struct inode *ino)
        /* Don't send a LAYOUTRETURN if list was initially empty */
        if (empty) {
                spin_unlock(&ino->i_lock);
-               pnfs_put_layout_hdr(lo);
                dprintk("NFS: %s no layout segments to return\n", __func__);
-               goto out;
+               goto out_put_layout_hdr;
        }
 
        set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-       lo->plh_block_lgets++;
+       send = pnfs_prepare_layoutreturn(lo);
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
-
-       status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+       if (send)
+               status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+out_put_layout_hdr:
+       pnfs_put_layout_hdr(lo);
 out:
        dprintk("<-- %s status: %d\n", __func__, status);
        return status;
@@ -1087,7 +1091,9 @@ bool pnfs_roc(struct inode *ino)
                }
        if (!found)
                goto out_noroc;
-       lo->plh_block_lgets++;
+       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               goto out_noroc;
+       lo->plh_return_iomode = IOMODE_ANY;
        pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
@@ -1097,13 +1103,9 @@ bool pnfs_roc(struct inode *ino)
 out_noroc:
        if (lo) {
                stateid = lo->plh_stateid;
-               layoutreturn =
-                       test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
-                                          &lo->plh_flags);
-               if (layoutreturn) {
-                       lo->plh_block_lgets++;
-                       pnfs_get_layout_hdr(lo);
-               }
+               if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                                          &lo->plh_flags))
+                       layoutreturn = pnfs_prepare_layoutreturn(lo);
        }
        spin_unlock(&ino->i_lock);
        if (layoutreturn) {
@@ -1119,7 +1121,7 @@ void pnfs_roc_release(struct inode *ino)
 
        spin_lock(&ino->i_lock);
        lo = NFS_I(ino)->layout;
-       lo->plh_block_lgets--;
+       pnfs_clear_layoutreturn_waitbit(lo);
        if (atomic_dec_and_test(&lo->plh_refcount)) {
                pnfs_detach_layout_hdr(lo);
                spin_unlock(&ino->i_lock);
@@ -1139,22 +1141,14 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
        spin_unlock(&ino->i_lock);
 }
 
-bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
+void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier)
 {
        struct nfs_inode *nfsi = NFS_I(ino);
        struct pnfs_layout_hdr *lo;
-       struct pnfs_layout_segment *lseg;
        nfs4_stateid stateid;
        u32 current_seqid;
-       bool found = false, layoutreturn = false;
 
        spin_lock(&ino->i_lock);
-       list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
-               if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
-                       rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
-                       found = true;
-                       goto out;
-               }
        lo = nfsi->layout;
        current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
 
@@ -1162,23 +1156,8 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
         * a barrier, we choose the worst-case barrier.
         */
        *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
-out:
-       if (!found) {
-               stateid = lo->plh_stateid;
-               layoutreturn =
-                       test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
-                                          &lo->plh_flags);
-               if (layoutreturn) {
-                       lo->plh_block_lgets++;
-                       pnfs_get_layout_hdr(lo);
-               }
-       }
+       stateid = lo->plh_stateid;
        spin_unlock(&ino->i_lock);
-       if (layoutreturn) {
-               rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
-               pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false);
-       }
-       return found;
 }
 
 /*
@@ -1423,6 +1402,8 @@ static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key)
 
 static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
 {
+       if (!pnfs_should_retry_layoutget(lo))
+               return false;
        /*
         * send layoutcommit as it can hold up layoutreturn due to lseg
         * reference
@@ -1518,8 +1499,7 @@ lookup_again:
         * Because we free lsegs before sending LAYOUTRETURN, we need to wait
         * for LAYOUTRETURN even if first is true.
         */
-       if (!lseg && pnfs_should_retry_layoutget(lo) &&
-           test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
+       if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
                spin_unlock(&ino->i_lock);
                dprintk("%s wait for layoutreturn\n", __func__);
                if (pnfs_prepare_to_retry_layoutget(lo)) {
@@ -1532,7 +1512,7 @@ lookup_again:
                goto out_put_layout_hdr;
        }
 
-       if (pnfs_layoutgets_blocked(lo, &arg, 0))
+       if (pnfs_layoutgets_blocked(lo))
                goto out_unlock;
        atomic_inc(&lo->plh_outstanding);
        spin_unlock(&ino->i_lock);
@@ -1604,12 +1584,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
        lseg->pls_range = res->range;
 
        spin_lock(&ino->i_lock);
-       if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
-               dprintk("%s forget reply due to recall\n", __func__);
-               goto out_forget_reply;
-       }
-
-       if (pnfs_layoutgets_blocked(lo, &lgp->args.range, 1)) {
+       if (pnfs_layoutgets_blocked(lo)) {
                dprintk("%s forget reply due to state\n", __func__);
                goto out_forget_reply;
        }
@@ -1695,7 +1670,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
        spin_lock(&inode->i_lock);
        /* set failure bit so that pnfs path will be retried later */
        pnfs_layout_set_fail_bit(lo, iomode);
-       set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
        if (lo->plh_return_iomode == 0)
                lo->plh_return_iomode = range.iomode;
        else if (lo->plh_return_iomode != range.iomode)
@@ -2207,13 +2181,12 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
        if (ld->prepare_layoutcommit) {
                status = ld->prepare_layoutcommit(&data->args);
                if (status) {
+                       put_rpccred(data->cred);
                        spin_lock(&inode->i_lock);
                        set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
                        if (end_pos > nfsi->layout->plh_lwb)
                                nfsi->layout->plh_lwb = end_pos;
-                       spin_unlock(&inode->i_lock);
-                       put_rpccred(data->cred);
-                       goto clear_layoutcommitting;
+                       goto out_unlock;
                }
        }
 
@@ -2254,7 +2227,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
 
 #if IS_ENABLED(CONFIG_NFS_V4_2)
 int
-pnfs_report_layoutstat(struct inode *inode)
+pnfs_report_layoutstat(struct inode *inode, gfp_t gfp_flags)
 {
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
        struct nfs_server *server = NFS_SERVER(inode);
@@ -2281,7 +2254,7 @@ pnfs_report_layoutstat(struct inode *inode)
        pnfs_get_layout_hdr(hdr);
        spin_unlock(&inode->i_lock);
 
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), gfp_flags);
        if (!data) {
                status = -ENOMEM;
                goto out_put;