ceph: add read/write latency metric support
authorXiubo Li <xiubli@redhat.com>
Fri, 20 Mar 2020 03:45:01 +0000 (23:45 -0400)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 1 Jun 2020 11:22:51 +0000 (13:22 +0200)
Calculate the latency for OSD read requests. Add a new r_end_stamp
field to struct ceph_osd_request that will hold the time of that
the reply was received. Use that to calculate the RTT for each call,
and divide the sum of those by number of calls to get averate RTT.

Keep a tally of RTT for OSD writes and number of calls to track average
latency of OSD writes.

URL: https://tracker.ceph.com/issues/43215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
fs/ceph/addr.c
fs/ceph/debugfs.c
fs/ceph/file.c
fs/ceph/metric.c
fs/ceph/metric.h
include/linux/ceph/osd_client.h
net/ceph/osd_client.c

index 6f4678d98df73f5edbddb7fe061a8593c3598194..01ad09733ac79735cea7c4e7235653921f684622 100644 (file)
 #include <linux/task_io_accounting_ops.h>
 #include <linux/signal.h>
 #include <linux/iversion.h>
+#include <linux/ktime.h>
 
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
+#include "metric.h"
 #include <linux/ceph/osd_client.h>
 #include <linux/ceph/striper.h>
 
@@ -216,6 +218,9 @@ static int ceph_sync_readpages(struct ceph_fs_client *fsc,
        if (!rc)
                rc = ceph_osdc_wait_request(osdc, req);
 
+       ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
+                                req->r_end_latency, rc);
+
        ceph_osdc_put_request(req);
        dout("readpages result %d\n", rc);
        return rc;
@@ -299,6 +304,7 @@ static int ceph_readpage(struct file *filp, struct page *page)
 static void finish_read(struct ceph_osd_request *req)
 {
        struct inode *inode = req->r_inode;
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_osd_data *osd_data;
        int rc = req->r_result <= 0 ? req->r_result : 0;
        int bytes = req->r_result >= 0 ? req->r_result : 0;
@@ -336,6 +342,10 @@ unlock:
                put_page(page);
                bytes -= PAGE_SIZE;
        }
+
+       ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
+                                req->r_end_latency, rc);
+
        kfree(osd_data->pages);
 }
 
@@ -643,6 +653,9 @@ static int ceph_sync_writepages(struct ceph_fs_client *fsc,
        if (!rc)
                rc = ceph_osdc_wait_request(osdc, req);
 
+       ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+                                 req->r_end_latency, rc);
+
        ceph_osdc_put_request(req);
        if (rc == 0)
                rc = len;
@@ -794,6 +807,9 @@ static void writepages_finish(struct ceph_osd_request *req)
                ceph_clear_error_write(ci);
        }
 
+       ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+                                 req->r_end_latency, rc);
+
        /*
         * We lost the cache cap, need to truncate the page before
         * it is unlocked, otherwise we'd truncate it later in the
@@ -1852,6 +1868,10 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
        err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
        if (!err)
                err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+       ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+                                 req->r_end_latency, err);
+
 out_put:
        ceph_osdc_put_request(req);
        if (err == -ECANCELED)
index 37b2775949adcf11b2fdabf279dfb45fc0d79665..30acbc7f9acdcb28e83ec5ceb53d3f5be405c0f5 100644 (file)
@@ -7,6 +7,8 @@
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/math64.h>
+#include <linux/ktime.h>
 
 #include <linux/ceph/libceph.h>
 #include <linux/ceph/mon_client.h>
@@ -18,6 +20,7 @@
 #ifdef CONFIG_DEBUG_FS
 
 #include "mds_client.h"
+#include "metric.h"
 
 static int mdsmap_show(struct seq_file *s, void *p)
 {
@@ -124,13 +127,51 @@ static int mdsc_show(struct seq_file *s, void *p)
        return 0;
 }
 
+#define CEPH_METRIC_SHOW(name, total, avg, min, max, sq) {             \
+       s64 _total, _avg, _min, _max, _sq, _st;                         \
+       _avg = ktime_to_us(avg);                                        \
+       _min = ktime_to_us(min == KTIME_MAX ? 0 : min);                 \
+       _max = ktime_to_us(max);                                        \
+       _total = total - 1;                                             \
+       _sq = _total > 0 ? DIV64_U64_ROUND_CLOSEST(sq, _total) : 0;     \
+       _st = int_sqrt64(_sq);                                          \
+       _st = ktime_to_us(_st);                                         \
+       seq_printf(s, "%-14s%-12lld%-16lld%-16lld%-16lld%lld\n",        \
+                  name, total, _avg, _min, _max, _st);                 \
+}
+
 static int metric_show(struct seq_file *s, void *p)
 {
        struct ceph_fs_client *fsc = s->private;
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_client_metric *m = &mdsc->metric;
        int i, nr_caps = 0;
-
+       s64 total, sum, avg, min, max, sq;
+
+       seq_printf(s, "item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)\n");
+       seq_printf(s, "-----------------------------------------------------------------------------------\n");
+
+       spin_lock(&m->read_latency_lock);
+       total = m->total_reads;
+       sum = m->read_latency_sum;
+       avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
+       min = m->read_latency_min;
+       max = m->read_latency_max;
+       sq = m->read_latency_sq_sum;
+       spin_unlock(&m->read_latency_lock);
+       CEPH_METRIC_SHOW("read", total, avg, min, max, sq);
+
+       spin_lock(&m->write_latency_lock);
+       total = m->total_writes;
+       sum = m->write_latency_sum;
+       avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
+       min = m->write_latency_min;
+       max = m->write_latency_max;
+       sq = m->write_latency_sq_sum;
+       spin_unlock(&m->write_latency_lock);
+       CEPH_METRIC_SHOW("write", total, avg, min, max, sq);
+
+       seq_printf(s, "\n");
        seq_printf(s, "item          total           miss            hit\n");
        seq_printf(s, "-------------------------------------------------\n");
 
index afdfca965a7fc59a12befb6731409e4155a6081f..160644ddaeed70598622cf41706f2c801dae5cf6 100644 (file)
 #include <linux/writeback.h>
 #include <linux/falloc.h>
 #include <linux/iversion.h>
+#include <linux/ktime.h>
 
 #include "super.h"
 #include "mds_client.h"
 #include "cache.h"
 #include "io.h"
+#include "metric.h"
 
 static __le32 ceph_flags_sys2wire(u32 flags)
 {
@@ -906,6 +908,12 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                ret = ceph_osdc_start_request(osdc, req, false);
                if (!ret)
                        ret = ceph_osdc_wait_request(osdc, req);
+
+               ceph_update_read_latency(&fsc->mdsc->metric,
+                                        req->r_start_latency,
+                                        req->r_end_latency,
+                                        ret);
+
                ceph_osdc_put_request(req);
 
                i_size = i_size_read(inode);
@@ -1044,6 +1052,8 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
        struct inode *inode = req->r_inode;
        struct ceph_aio_request *aio_req = req->r_priv;
        struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       struct ceph_client_metric *metric = &fsc->mdsc->metric;
 
        BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
        BUG_ON(!osd_data->num_bvecs);
@@ -1051,6 +1061,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
        dout("ceph_aio_complete_req %p rc %d bytes %u\n",
             inode, rc, osd_data->bvec_pos.iter.bi_size);
 
+       /* r_start_latency == 0 means the request was not submitted */
+       if (req->r_start_latency) {
+               if (aio_req->write)
+                       ceph_update_write_latency(metric, req->r_start_latency,
+                                                 req->r_end_latency, rc);
+               else
+                       ceph_update_read_latency(metric, req->r_start_latency,
+                                                req->r_end_latency, rc);
+       }
+
        if (rc == -EOLDSNAPC) {
                struct ceph_aio_work *aio_work;
                BUG_ON(!aio_req->write);
@@ -1179,6 +1199,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       struct ceph_client_metric *metric = &fsc->mdsc->metric;
        struct ceph_vino vino;
        struct ceph_osd_request *req;
        struct bio_vec *bvecs;
@@ -1295,6 +1316,13 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
                if (!ret)
                        ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
 
+               if (write)
+                       ceph_update_write_latency(metric, req->r_start_latency,
+                                                 req->r_end_latency, ret);
+               else
+                       ceph_update_read_latency(metric, req->r_start_latency,
+                                                req->r_end_latency, ret);
+
                size = i_size_read(inode);
                if (!write) {
                        if (ret == -ENOENT)
@@ -1466,6 +1494,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
                if (!ret)
                        ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
 
+               ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+                                         req->r_end_latency, ret);
 out:
                ceph_osdc_put_request(req);
                if (ret != 0) {
index 2a4b7394eed4d3ecc5550114da808f33149b14b2..f5cf32e7789f2c6be6ca1a90b40b25a52d3a882b 100644 (file)
@@ -2,6 +2,7 @@
 
 #include <linux/types.h>
 #include <linux/percpu_counter.h>
+#include <linux/math64.h>
 
 #include "metric.h"
 
@@ -29,6 +30,20 @@ int ceph_metric_init(struct ceph_client_metric *m)
        if (ret)
                goto err_i_caps_mis;
 
+       spin_lock_init(&m->read_latency_lock);
+       m->read_latency_sq_sum = 0;
+       m->read_latency_min = KTIME_MAX;
+       m->read_latency_max = 0;
+       m->total_reads = 0;
+       m->read_latency_sum = 0;
+
+       spin_lock_init(&m->write_latency_lock);
+       m->write_latency_sq_sum = 0;
+       m->write_latency_min = KTIME_MAX;
+       m->write_latency_max = 0;
+       m->total_writes = 0;
+       m->write_latency_sum = 0;
+
        return 0;
 
 err_i_caps_mis:
@@ -51,3 +66,60 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
        percpu_counter_destroy(&m->d_lease_mis);
        percpu_counter_destroy(&m->d_lease_hit);
 }
+
+static inline void __update_latency(ktime_t *totalp, ktime_t *lsump,
+                                   ktime_t *min, ktime_t *max,
+                                   ktime_t *sq_sump, ktime_t lat)
+{
+       ktime_t total, avg, sq, lsum;
+
+       total = ++(*totalp);
+       lsum = (*lsump += lat);
+
+       if (unlikely(lat < *min))
+               *min = lat;
+       if (unlikely(lat > *max))
+               *max = lat;
+
+       if (unlikely(total == 1))
+               return;
+
+       /* the sq is (lat - old_avg) * (lat - new_avg) */
+       avg = DIV64_U64_ROUND_CLOSEST((lsum - lat), (total - 1));
+       sq = lat - avg;
+       avg = DIV64_U64_ROUND_CLOSEST(lsum, total);
+       sq = sq * (lat - avg);
+       *sq_sump += sq;
+}
+
+void ceph_update_read_latency(struct ceph_client_metric *m,
+                             ktime_t r_start, ktime_t r_end,
+                             int rc)
+{
+       ktime_t lat = ktime_sub(r_end, r_start);
+
+       if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT))
+               return;
+
+       spin_lock(&m->read_latency_lock);
+       __update_latency(&m->total_reads, &m->read_latency_sum,
+                        &m->read_latency_min, &m->read_latency_max,
+                        &m->read_latency_sq_sum, lat);
+       spin_unlock(&m->read_latency_lock);
+}
+
+void ceph_update_write_latency(struct ceph_client_metric *m,
+                              ktime_t r_start, ktime_t r_end,
+                              int rc)
+{
+       ktime_t lat = ktime_sub(r_end, r_start);
+
+       if (unlikely(rc && rc != -ETIMEDOUT))
+               return;
+
+       spin_lock(&m->write_latency_lock);
+       __update_latency(&m->total_writes, &m->write_latency_sum,
+                        &m->write_latency_min, &m->write_latency_max,
+                        &m->write_latency_sq_sum, lat);
+       spin_unlock(&m->write_latency_lock);
+}
index 098ee8a9adaff30c9e605b12795dfd106253e4d2..ab1543c70a757450a17a8f966fd251d0e2a49ecf 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/percpu_counter.h>
+#include <linux/ktime.h>
 
 /* This is the global metrics */
 struct ceph_client_metric {
@@ -13,6 +14,20 @@ struct ceph_client_metric {
 
        struct percpu_counter i_caps_hit;
        struct percpu_counter i_caps_mis;
+
+       spinlock_t read_latency_lock;
+       u64 total_reads;
+       ktime_t read_latency_sum;
+       ktime_t read_latency_sq_sum;
+       ktime_t read_latency_min;
+       ktime_t read_latency_max;
+
+       spinlock_t write_latency_lock;
+       u64 total_writes;
+       ktime_t write_latency_sum;
+       ktime_t write_latency_sq_sum;
+       ktime_t write_latency_min;
+       ktime_t write_latency_max;
 };
 
 extern int ceph_metric_init(struct ceph_client_metric *m);
@@ -27,4 +42,11 @@ static inline void ceph_update_cap_mis(struct ceph_client_metric *m)
 {
        percpu_counter_inc(&m->i_caps_mis);
 }
+
+extern void ceph_update_read_latency(struct ceph_client_metric *m,
+                                    ktime_t r_start, ktime_t r_end,
+                                    int rc);
+extern void ceph_update_write_latency(struct ceph_client_metric *m,
+                                     ktime_t r_start, ktime_t r_end,
+                                     int rc);
 #endif /* _FS_CEPH_MDS_METRIC_H */
index 9d9f745b98a10015a8b6caf42578bc1ebff3a261..734f7c6a9f5695764b9dcd2110a530566d3b2bf2 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/mempool.h>
 #include <linux/rbtree.h>
 #include <linux/refcount.h>
+#include <linux/ktime.h>
 
 #include <linux/ceph/types.h>
 #include <linux/ceph/osdmap.h>
@@ -213,6 +214,8 @@ struct ceph_osd_request {
        /* internal */
        unsigned long r_stamp;                /* jiffies, send or check time */
        unsigned long r_start_stamp;          /* jiffies */
+       ktime_t r_start_latency;              /* ktime_t */
+       ktime_t r_end_latency;                /* ktime_t */
        int r_attempts;
        u32 r_map_dne_bound;
 
index 1d4973f8cd7a69efe0763ba18990f96a2108893a..ece124a5138e3e408bbcfad06ec7027f2ce91f4c 100644 (file)
@@ -2373,6 +2373,7 @@ static void account_request(struct ceph_osd_request *req)
        atomic_inc(&req->r_osdc->num_requests);
 
        req->r_start_stamp = jiffies;
+       req->r_start_latency = ktime_get();
 }
 
 static void submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -2389,6 +2390,8 @@ static void finish_request(struct ceph_osd_request *req)
        WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
        dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
 
+       req->r_end_latency = ktime_get();
+
        if (req->r_osd)
                unlink_request(req->r_osd, req);
        atomic_dec(&osdc->num_requests);