NFS/flexfiles: Speed up read failover when DSes are down
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Thu, 14 Feb 2019 22:32:40 +0000 (17:32 -0500)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Sat, 2 Mar 2019 03:37:38 +0000 (22:37 -0500)
If we notice that a DS may be down, we should attempt to read from the
other mirrors first before we go back to retry the dead DS.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/pnfs.h
fs/nfs/pnfs_dev.c

index 747ab2fafb69d358ea3cc4ea16de5ff16289d231..44ccfce3784e5d8f258e719456439ec5f9b95f39 100644 (file)
@@ -788,10 +788,28 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
        }
 }
 
+static void
+ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
+{
+       struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+
+       if (devid)
+               nfs4_mark_deviceid_unavailable(devid);
+}
+
+static void
+ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
+{
+       struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
+
+       if (devid)
+               nfs4_mark_deviceid_available(devid);
+}
+
 static struct nfs4_pnfs_ds *
-ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
-                                 int start_idx,
-                                 int *best_idx)
+ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
+                            int start_idx, int *best_idx,
+                            bool check_device)
 {
        struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
        struct nfs4_ff_layout_mirror *mirror;
@@ -799,25 +817,53 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
        bool fail_return = false;
        int idx;
 
-       /* mirrors are sorted by efficiency */
+       /* mirrors are initially sorted by efficiency */
        for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
                if (idx+1 == fls->mirror_array_cnt)
-                       fail_return = true;
+                       fail_return = !check_device;
 
                mirror = FF_LAYOUT_COMP(lseg, idx);
-               if (ff_layout_test_devid_unavailable(&mirror->mirror_ds->id_node))
+               ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return);
+               if (!ds)
                        continue;
 
-               ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return);
-               if (ds) {
-                       *best_idx = idx;
-                       return ds;
-               }
+               if (check_device &&
+                   nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
+                       continue;
+
+               *best_idx = idx;
+               return ds;
        }
 
        return NULL;
 }
 
+static struct nfs4_pnfs_ds *
+ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
+                                int start_idx, int *best_idx)
+{
+       return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
+}
+
+static struct nfs4_pnfs_ds *
+ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
+                                  int start_idx, int *best_idx)
+{
+       return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
+}
+
+static struct nfs4_pnfs_ds *
+ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
+                                 int start_idx, int *best_idx)
+{
+       struct nfs4_pnfs_ds *ds;
+
+       ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
+       if (ds)
+               return ds;
+       return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
+}
+
 static void
 ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio,
                      struct nfs_page *req,
@@ -1167,8 +1213,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
 {
        int vers = clp->cl_nfs_mod->rpc_vers->number;
 
-       if (task->tk_status >= 0)
+       if (task->tk_status >= 0) {
+               ff_layout_mark_ds_reachable(lseg, idx);
                return 0;
+       }
 
        /* Handle the case of an invalid layout segment */
        if (!pnfs_is_valid_lseg(lseg))
@@ -1231,6 +1279,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
        err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
                                       mirror, offset, length, status, opnum,
                                       GFP_NOIO);
+       if (status == NFS4ERR_NXIO)
+               ff_layout_mark_ds_unreachable(lseg, idx);
        pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg);
        dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status);
 }
index 56659ccce1d8995ff5e100d23af387a4fe9f668e..c0420b979d882cbf0245ecd22df897c3236eba57 100644 (file)
@@ -350,6 +350,7 @@ void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nf
 void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *,
                             const struct nfs4_deviceid *);
 bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
+void nfs4_mark_deviceid_available(struct nfs4_deviceid_node *node);
 void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
 bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
 void nfs4_deviceid_purge_client(const struct nfs_client *);
index ec6b607ff1d70f39d559a013ac46e22d411ac9d0..537b80d693f1ef8f9d71de664f9a87c5f0ca176a 100644 (file)
@@ -283,6 +283,16 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
 }
 EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node);
 
+void
+nfs4_mark_deviceid_available(struct nfs4_deviceid_node *node)
+{
+       if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
+               clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
+               smp_mb__after_atomic();
+       }
+}
+EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_available);
+
 void
 nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node)
 {