SUNRPC: Replace jiffies-based metrics with ktime-based metrics
authorChuck Lever <chuck.lever@oracle.com>
Fri, 7 May 2010 17:34:47 +0000 (13:34 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 14 May 2010 19:09:33 +0000 (15:09 -0400)
Currently RPC performance metrics that tabulate elapsed time use
jiffies time values.  This is problematic on systems that use slow
jiffies (for instance 100HZ systems built for paravirtualized
environments).  It is also a problem for computing precise latency
statistics for advanced network transports, such as InfiniBand,
that can have round-trip latencies significanly faster than a single
clock tick.

For the RPC client, adopt the high resolution time stamp mechanism
already used by the network layer and blktrace: ktime.

We use ktime format time stamps for all internal computations, and
convert to milliseconds for presentation.  As a result, we need only
addition operations in the performance critical paths; multiply/divide
is required only for presentation.

We could report RTT metrics in microseconds.  In fact the mountstats
format is versioned to accomodate exactly this kind of interface
improvement.

For now, however, we'll stay with millisecond precision for
presentation to maintain backwards compatibility with the handful of
currently deployed user space tools.  At a later point, we'll move to
an API such as BDI_STATS where a finer timestamp precision can be
reported.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
include/linux/sunrpc/metrics.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/xprt.h
net/sunrpc/sched.c
net/sunrpc/stats.c
net/sunrpc/xprt.c

index 77f78e56c4814606066669b2ba45104fafb45b91..b6edbc0ea83dddcdc450fa05a06cc6face7d69b5 100644 (file)
@@ -26,6 +26,7 @@
 #define _LINUX_SUNRPC_METRICS_H
 
 #include <linux/seq_file.h>
+#include <linux/ktime.h>
 
 #define RPC_IOSTATS_VERS       "1.0"
 
@@ -58,9 +59,9 @@ struct rpc_iostats {
         * and the total time the request spent from init to release
         * are measured.
         */
-       unsigned long long      om_queue,       /* jiffies queued for xmit */
-                               om_rtt,         /* jiffies for RPC RTT */
-                               om_execute;     /* jiffies for RPC execution */
+       ktime_t                 om_queue,       /* queued for xmit */
+                               om_rtt,         /* RPC RTT */
+                               om_execute;     /* RPC execution */
 } ____cacheline_aligned;
 
 struct rpc_task;
index 7bc7fd5291ce37d6abcbe7f44e96156540beb46d..76720d262ef22aabfff476d02150e8faca49558f 100644 (file)
@@ -10,6 +10,7 @@
 #define _LINUX_SUNRPC_SCHED_H_
 
 #include <linux/timer.h>
+#include <linux/ktime.h>
 #include <linux/sunrpc/types.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
@@ -80,8 +81,8 @@ struct rpc_task {
 
        unsigned short          tk_timeouts;    /* maj timeouts */
        size_t                  tk_bytes_sent;  /* total bytes sent */
-       unsigned long           tk_start;       /* RPC task init timestamp */
-       long                    tk_rtt;         /* round-trip time (jiffies) */
+       ktime_t                 tk_start,       /* RPC task init timestamp */
+                               tk_rtt;         /* round-trip time */
 
        pid_t                   tk_owner;       /* Process id for batching tasks */
        unsigned char           tk_priority : 2;/* Task priority */
index 04fc342d9840059cd351503c79ddb8ccd046aef9..f8851861b74410bda47fdc02c0ac04e09b0a568d 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kref.h>
+#include <linux/ktime.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
@@ -94,7 +95,7 @@ struct rpc_rqst {
         */
        u32                     rq_bytes_sent;  /* Bytes we have sent */
 
-       unsigned long           rq_xtime;       /* when transmitted */
+       ktime_t                 rq_xtime;       /* transmit time stamp */
        int                     rq_ntrans;
 
 #if defined(CONFIG_NFS_V4_1)
index c8979ce5d88ae3fc7bd5554bc89c5697b1dfc549..aa7b07ef5d55c3a5040511fed9f965b3be6f3070 100644 (file)
@@ -834,7 +834,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
        }
 
        /* starting timestamp */
-       task->tk_start = jiffies;
+       task->tk_start = ktime_get();
 
        dprintk("RPC:       new task initialized, procpid %u\n",
                                task_pid_nr(current));
index 5785d2037f45ea72bd065c1b93a0f22e16ec5526..aacd95f0dce5b5a947ff79e8f4ece63a6e22967a 100644 (file)
@@ -144,7 +144,7 @@ void rpc_count_iostats(struct rpc_task *task)
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_iostats *stats;
        struct rpc_iostats *op_metrics;
-       long rtt, execute, queue;
+       ktime_t delta;
 
        if (!task->tk_client || !task->tk_client->cl_metrics || !req)
                return;
@@ -159,20 +159,13 @@ void rpc_count_iostats(struct rpc_task *task)
        op_metrics->om_bytes_sent += task->tk_bytes_sent;
        op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
 
-       queue = (long)req->rq_xtime - task->tk_start;
-       if (queue < 0)
-               queue = -queue;
-       op_metrics->om_queue += queue;
+       delta = ktime_sub(req->rq_xtime, task->tk_start);
+       op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
 
-       rtt = task->tk_rtt;
-       if (rtt < 0)
-               rtt = -rtt;
-       op_metrics->om_rtt += rtt;
+       op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, task->tk_rtt);
 
-       execute = (long)jiffies - task->tk_start;
-       if (execute < 0)
-               execute = -execute;
-       op_metrics->om_execute += execute;
+       delta = ktime_sub(ktime_get(), task->tk_start);
+       op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
 }
 
 static void _print_name(struct seq_file *seq, unsigned int op,
@@ -186,8 +179,6 @@ static void _print_name(struct seq_file *seq, unsigned int op,
                seq_printf(seq, "\t%12u: ", op);
 }
 
-#define MILLISECS_PER_JIFFY    (1000 / HZ)
-
 void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 {
        struct rpc_iostats *stats = clnt->cl_metrics;
@@ -214,9 +205,9 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
                                metrics->om_timeouts,
                                metrics->om_bytes_sent,
                                metrics->om_bytes_recv,
-                               metrics->om_queue * MILLISECS_PER_JIFFY,
-                               metrics->om_rtt * MILLISECS_PER_JIFFY,
-                               metrics->om_execute * MILLISECS_PER_JIFFY);
+                               ktime_to_ms(metrics->om_queue),
+                               ktime_to_ms(metrics->om_rtt),
+                               ktime_to_ms(metrics->om_execute));
        }
 }
 EXPORT_SYMBOL_GPL(rpc_print_iostats);
index 698c627122944ce1d4310f904e17f0e1419ae4d8..8986b1b8286206650fe5f0cd55cb013a6f1841cc 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
 #include <linux/net.h>
+#include <linux/ktime.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/metrics.h>
@@ -779,10 +780,11 @@ static void xprt_update_rtt(struct rpc_task *task)
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_rtt *rtt = task->tk_client->cl_rtt;
        unsigned timer = task->tk_msg.rpc_proc->p_timer;
+       long m = usecs_to_jiffies(ktime_to_us(task->tk_rtt));
 
        if (timer) {
                if (req->rq_ntrans == 1)
-                       rpc_update_rtt(rtt, timer, task->tk_rtt);
+                       rpc_update_rtt(rtt, timer, m);
                rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
        }
 }
@@ -803,7 +805,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
                        task->tk_pid, ntohl(req->rq_xid), copied);
 
        xprt->stat.recvs++;
-       task->tk_rtt = (long)jiffies - req->rq_xtime;
+       task->tk_rtt = ktime_sub(ktime_get(), req->rq_xtime);
        if (xprt->ops->timer != NULL)
                xprt_update_rtt(task);
 
@@ -904,7 +906,7 @@ void xprt_transmit(struct rpc_task *task)
                return;
 
        req->rq_connect_cookie = xprt->connect_cookie;
-       req->rq_xtime = jiffies;
+       req->rq_xtime = ktime_get();
        status = xprt->ops->send_request(task);
        if (status != 0) {
                task->tk_status = status;