libceph: make sure need_resend targets reflect latest map
authorIlya Dryomov <idryomov@gmail.com>
Thu, 15 Jun 2017 14:30:55 +0000 (16:30 +0200)
committerIlya Dryomov <idryomov@gmail.com>
Fri, 7 Jul 2017 15:25:16 +0000 (17:25 +0200)
Otherwise we may miss events like PG splits, pool deletions, etc when
we get multiple incremental maps at once.  Because check_pool_dne() can
now be fed an unlinked request, finish_request() needed to be taught to
handle unlinked requests.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
include/linux/ceph/osd_client.h
net/ceph/debugfs.c
net/ceph/osd_client.c

index bca2718ac25370988f26cac21f7aa2767a8babca..62c672bcbb31a861bb2ed289b105078f3db5d0d3 100644 (file)
@@ -149,6 +149,7 @@ struct ceph_osd_request_target {
        unsigned int flags;                /* CEPH_OSD_FLAG_* */
        bool paused;
 
+       u32 epoch;
        u32 last_force_resend;
 
        int osd;
index 50ab1bdb16e22237bef6bc202f2befa75210e8cd..c0089f8ccaeb98fee2d1560fc3a17d8733cc7914 100644 (file)
@@ -166,7 +166,7 @@ static void dump_target(struct seq_file *s, struct ceph_osd_request_target *t)
        seq_printf(s, "]/%d\t[", t->up.primary);
        for (i = 0; i < t->acting.size; i++)
                seq_printf(s, "%s%d", (!i ? "" : ","), t->acting.osds[i]);
-       seq_printf(s, "]/%d\t", t->acting.primary);
+       seq_printf(s, "]/%d\te%u\t", t->acting.primary, t->epoch);
        if (t->target_oloc.pool_ns) {
                seq_printf(s, "%*pE/%*pE\t0x%x",
                        (int)t->target_oloc.pool_ns->len,
index 576101b635ef71ded2810985e962c464b31285e1..173ab9c68eb6c3a5bcfc29170b53e1b69ab74ea8 100644 (file)
@@ -386,6 +386,7 @@ static void target_copy(struct ceph_osd_request_target *dest,
        dest->flags = src->flags;
        dest->paused = src->paused;
 
+       dest->epoch = src->epoch;
        dest->last_force_resend = src->last_force_resend;
 
        dest->osd = src->osd;
@@ -1334,6 +1335,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
        enum calc_target_result ct_res;
        int ret;
 
+       t->epoch = osdc->osdmap->epoch;
        pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool);
        if (!pi) {
                t->osd = CEPH_HOMELESS_OSD;
@@ -1720,10 +1722,11 @@ static void send_request(struct ceph_osd_request *req)
 
        encode_request_partial(req, req->r_request);
 
-       dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d flags 0x%x attempt %d\n",
+       dout("%s req %p tid %llu to pgid %llu.%x spgid %llu.%xs%d osd%d e%u flags 0x%x attempt %d\n",
             __func__, req, req->r_tid, req->r_t.pgid.pool, req->r_t.pgid.seed,
             req->r_t.spgid.pgid.pool, req->r_t.spgid.pgid.seed,
-            req->r_t.spgid.shard, osd->o_osd, req->r_flags, req->r_attempts);
+            req->r_t.spgid.shard, osd->o_osd, req->r_t.epoch, req->r_flags,
+            req->r_attempts);
 
        req->r_t.paused = false;
        req->r_stamp = jiffies;
@@ -1863,13 +1866,12 @@ static void submit_request(struct ceph_osd_request *req, bool wrlocked)
 static void finish_request(struct ceph_osd_request *req)
 {
        struct ceph_osd_client *osdc = req->r_osdc;
-       struct ceph_osd *osd = req->r_osd;
 
-       verify_osd_locked(osd);
+       WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
        dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
 
-       WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
-       unlink_request(osd, req);
+       if (req->r_osd)
+               unlink_request(req->r_osd, req);
        atomic_dec(&osdc->num_requests);
 
        /*
@@ -3356,8 +3358,25 @@ static void kick_requests(struct ceph_osd_client *osdc,
                          struct list_head *need_resend_linger)
 {
        struct ceph_osd_linger_request *lreq, *nlreq;
+       enum calc_target_result ct_res;
        struct rb_node *n;
 
+       /* make sure need_resend targets reflect latest map */
+       for (n = rb_first(need_resend); n; ) {
+               struct ceph_osd_request *req =
+                   rb_entry(n, struct ceph_osd_request, r_node);
+
+               n = rb_next(n);
+
+               if (req->r_t.epoch < osdc->osdmap->epoch) {
+                       ct_res = calc_target(osdc, &req->r_t, NULL, false);
+                       if (ct_res == CALC_TARGET_POOL_DNE) {
+                               erase_request(need_resend, req);
+                               check_pool_dne(req);
+                       }
+               }
+       }
+
        for (n = rb_first(need_resend); n; ) {
                struct ceph_osd_request *req =
                    rb_entry(n, struct ceph_osd_request, r_node);
@@ -3366,8 +3385,6 @@ static void kick_requests(struct ceph_osd_client *osdc,
                n = rb_next(n);
                erase_request(need_resend, req); /* before link_request() */
 
-               WARN_ON(req->r_osd);
-               calc_target(osdc, &req->r_t, NULL, false);
                osd = lookup_create_osd(osdc, req->r_t.osd, true);
                link_request(osd, req);
                if (!req->r_linger) {