block: add iostat counters for flush requests
authorKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Thu, 21 Nov 2019 10:40:26 +0000 (13:40 +0300)
committerJens Axboe <axboe@kernel.dk>
Thu, 21 Nov 2019 16:06:47 +0000 (09:06 -0700)
Requests that triggers flushing volatile writeback cache to disk (barriers)
have significant effect to overall performance.

Block layer has sophisticated engine for combining several flush requests
into one. But there is no statistics for actual flushes executed by disk.
Requests which trigger flushes usually are barriers - zero-size writes.

This patch adds two iostat counters into /sys/class/block/$dev/stat and
/proc/diskstats - count of completed flush requests and their total time.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Documentation/ABI/testing/procfs-diskstats
Documentation/ABI/testing/sysfs-block
Documentation/admin-guide/iostats.rst
Documentation/block/stat.rst
block/blk-flush.c
block/genhd.c
block/partition-generic.c
include/linux/blk_types.h

index 2c44b4f1b060b10b73c3fb19ee8461abf5ea6b63..70dcaf2481f49c99e4e5b23eee5a0f7b671e8369 100644 (file)
@@ -29,4 +29,9 @@ Description:
                17 - sectors discarded
                18 - time spent discarding
 
+               Kernel 5.5+ appends two more fields for flush requests:
+
+               19 - flush requests completed successfully
+               20 - time spent flushing
+
                For more details refer to Documentation/admin-guide/iostats.rst
index f8c7c7126bb1a47e4b42347d71897cb2288290e2..ed8c14f161ee3cbc2bb21760ccd1ff9b9ea4bb9a 100644 (file)
@@ -15,6 +15,12 @@ Description:
                 9 - I/Os currently in progress
                10 - time spent doing I/Os (ms)
                11 - weighted time spent doing I/Os (ms)
+               12 - discards completed
+               13 - discards merged
+               14 - sectors discarded
+               15 - time spent discarding (ms)
+               16 - flush requests completed
+               17 - time spent flushing (ms)
                For more details refer Documentation/admin-guide/iostats.rst
 
 
index 5d63b18bd6d1f626cfefcceb76727df2dcb5f38d..4f0462af3ca78f393271cf11d67065e0f86da762 100644 (file)
@@ -121,6 +121,15 @@ Field 15 -- # of milliseconds spent discarding
     This is the total number of milliseconds spent by all discards (as
     measured from __make_request() to end_that_request_last()).
 
+Field 16 -- # of flush requests completed
+    This is the total number of flush requests completed successfully.
+
+    Block layer combines flush requests and executes at most one at a time.
+    This counts flush requests executed by disk. Not tracked for partitions.
+
+Field 17 -- # of milliseconds spent flushing
+    This is the total number of milliseconds spent by all flush requests.
+
 To avoid introducing performance bottlenecks, no locks are held while
 modifying these counters.  This implies that minor inaccuracies may be
 introduced when changes collide, so (for instance) adding up all the
index 9c07bc22b0bc4dd2305888a386292ff000554c46..77311335c08bad8687f5b81235f8f7e72b0b76b7 100644 (file)
@@ -41,6 +41,8 @@ discard I/Os    requests      number of discard I/Os processed
 discard merges  requests      number of discard I/Os merged with in-queue I/O
 discard sectors sectors       number of sectors discarded
 discard ticks   milliseconds  total wait time for discard requests
+flush I/Os      requests      number of flush I/Os processed
+flush ticks     milliseconds  total wait time for flush requests
 =============== ============= =================================================
 
 read I/Os, write I/Os, discard I/0s
@@ -48,6 +50,14 @@ read I/Os, write I/Os, discard I/0s
 
 These values increment when an I/O request completes.
 
+flush I/Os
+==========
+
+These values increment when an flush I/O request completes.
+
+Block layer combines flush requests and executes at most one at a time.
+This counts flush requests executed by disk. Not tracked for partitions.
+
 read merges, write merges, discard merges
 =========================================
 
@@ -62,8 +72,8 @@ discarded from this block device.  The "sectors" in question are the
 standard UNIX 512-byte sectors, not any device- or filesystem-specific
 block size.  The counters are incremented when the I/O completes.
 
-read ticks, write ticks, discard ticks
-======================================
+read ticks, write ticks, discard ticks, flush ticks
+===================================================
 
 These values count the number of milliseconds that I/O requests have
 waited on this block device.  If there are multiple I/O requests waiting,
index 1eec9cbe5a0a1b56cc81c949eb1f275b6c6a58de..1777346baf06f23d6c4411b77bcd12e573eea3b6 100644 (file)
@@ -136,6 +136,17 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front)
        blk_mq_add_to_requeue_list(rq, add_front, true);
 }
 
+static void blk_account_io_flush(struct request *rq)
+{
+       struct hd_struct *part = &rq->rq_disk->part0;
+
+       part_stat_lock();
+       part_stat_inc(part, ios[STAT_FLUSH]);
+       part_stat_add(part, nsecs[STAT_FLUSH],
+                     ktime_get_ns() - rq->start_time_ns);
+       part_stat_unlock();
+}
+
 /**
  * blk_flush_complete_seq - complete flush sequence
  * @rq: PREFLUSH/FUA request being sequenced
@@ -185,7 +196,7 @@ static void blk_flush_complete_seq(struct request *rq,
 
        case REQ_FSEQ_DONE:
                /*
-                * @rq was previously adjusted by blk_flush_issue() for
+                * @rq was previously adjusted by blk_insert_flush() for
                 * flush sequencing and may already have gone through the
                 * flush data request completion path.  Restore @rq for
                 * normal completion and end it.
@@ -212,6 +223,8 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
        struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
        struct blk_mq_hw_ctx *hctx;
 
+       blk_account_io_flush(flush_rq);
+
        /* release the tag's ownership to the req cloned from */
        spin_lock_irqsave(&fq->mq_flush_lock, flags);
 
index 26b31fcae217fd3d936ccb32c6d7847ff5654805..ff6268970ddc069f84b3977069cca72ba10627c6 100644 (file)
@@ -1385,7 +1385,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                           "%lu %lu %lu %u "
                           "%lu %lu %lu %u "
                           "%u %u %u "
-                          "%lu %lu %lu %u\n",
+                          "%lu %lu %lu %u "
+                          "%lu %u"
+                          "\n",
                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
                           disk_name(gp, hd->partno, buf),
                           part_stat_read(hd, ios[STAT_READ]),
@@ -1402,7 +1404,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
                           part_stat_read(hd, ios[STAT_DISCARD]),
                           part_stat_read(hd, merges[STAT_DISCARD]),
                           part_stat_read(hd, sectors[STAT_DISCARD]),
-                          (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD)
+                          (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
+                          part_stat_read(hd, ios[STAT_FLUSH]),
+                          (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
                        );
        }
        disk_part_iter_exit(&piter);
index aee643ce13d15cf6888589015222836f3550b5db..3db8b73a96b15b7bcc78a59da4d02d5000ed6c6f 100644 (file)
@@ -127,7 +127,8 @@ ssize_t part_stat_show(struct device *dev,
                "%8lu %8lu %8llu %8u "
                "%8lu %8lu %8llu %8u "
                "%8u %8u %8u "
-               "%8lu %8lu %8llu %8u"
+               "%8lu %8lu %8llu %8u "
+               "%8lu %8u"
                "\n",
                part_stat_read(p, ios[STAT_READ]),
                part_stat_read(p, merges[STAT_READ]),
@@ -143,7 +144,9 @@ ssize_t part_stat_show(struct device *dev,
                part_stat_read(p, ios[STAT_DISCARD]),
                part_stat_read(p, merges[STAT_DISCARD]),
                (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
-               (unsigned int)part_stat_read_msecs(p, STAT_DISCARD));
+               (unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
+               part_stat_read(p, ios[STAT_FLUSH]),
+               (unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
 }
 
 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
index 23a2fd534817c04ca2d9e7ff53ee3b04a0434d27..70254ae117690c40fa40692bbf790c13cfcdacdf 100644 (file)
@@ -377,6 +377,7 @@ enum stat_group {
        STAT_READ,
        STAT_WRITE,
        STAT_DISCARD,
+       STAT_FLUSH,
 
        NR_STAT_GROUPS
 };