blkparse: Handle cgroup information
authorJan Kara <jack@suse.cz>
Wed, 6 May 2020 13:39:31 +0000 (15:39 +0200)
committerJens Axboe <axboe@kernel.dk>
Wed, 20 May 2020 13:37:53 +0000 (07:37 -0600)
Since Linux kernel commit 35fe6d763229 "block: use standard blktrace API
to output cgroup info for debug notes" the kernel can pass
__BLK_TA_CGROUP flag in the action field of generated events. blkparse
does not count with this and so it will get confused by such events and
either ignore them or misreport them. Teach blkparse how to properly
process events with __BLK_TA_CGROUP flag.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
blkparse.c
blkparse_fmt.c
blktrace_api.h
doc/blkparse.1
doc/blktrace.tex

index 796059bb81786a700e75b46e9938c79337f1350c..40e7bc2486ec79d3f35242575c7c77d37c2a8726 100644 (file)
@@ -617,7 +617,7 @@ static void handle_notify(struct blk_io_trace *bit)
        void    *payload = (caddr_t) bit + sizeof(*bit);
        __u32   two32[2];
 
-       switch (bit->action) {
+       switch (bit->action & ~__BLK_TN_CGROUP) {
        case BLK_TN_PROCESS:
                add_ppm_hash(bit->pid, payload);
                break;
@@ -643,16 +643,27 @@ static void handle_notify(struct blk_io_trace *bit)
        case BLK_TN_MESSAGE:
                if (bit->pdu_len > 0) {
                        char msg[bit->pdu_len+1];
+                       int len = bit->pdu_len;
+                       char cgidstr[24];
 
-                       memcpy(msg, (char *)payload, bit->pdu_len);
-                       msg[bit->pdu_len] = '\0';
+                       cgidstr[0] = 0;
+                       if (bit->action & __BLK_TN_CGROUP) {
+                               struct blk_io_cgroup_payload *cgid = payload;
+
+                               sprintf(cgidstr, "%x,%x ", cgid->ino,
+                                       cgid->gen);
+                               payload += sizeof(struct blk_io_cgroup_payload);
+                               len -= sizeof(struct blk_io_cgroup_payload);
+                       }
+                       memcpy(msg, (char *)payload, len);
+                       msg[len] = '\0';
 
                        fprintf(ofp,
-                               "%3d,%-3d %2d %8s %5d.%09lu %5u %2s %3s %s\n",
+                               "%3d,%-3d %2d %8s %5d.%09lu %5u %s%2s %3s %s\n",
                                MAJOR(bit->device), MINOR(bit->device),
-                               bit->cpu, "0", (int) SECONDS(bit->time),
-                               (unsigned long) NANO_SECONDS(bit->time),
-                               0, "m", "N", msg);
+                               bit->cpu, "0", (int)SECONDS(bit->time),
+                               (unsigned long)NANO_SECONDS(bit->time),
+                               0, cgidstr, "m", "N", msg);
                }
                break;
 
@@ -1600,7 +1611,7 @@ static void dump_trace_pc(struct blk_io_trace *t, struct per_dev_info *pdi,
                          struct per_cpu_info *pci)
 {
        int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
-       int act = t->action & 0xffff;
+       int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
 
        switch (act) {
                case __BLK_TA_QUEUE:
@@ -1649,7 +1660,7 @@ static void dump_trace_fs(struct blk_io_trace *t, struct per_dev_info *pdi,
                          struct per_cpu_info *pci)
 {
        int w = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
-       int act = t->action & 0xffff;
+       int act = (t->action & 0xffff) & ~__BLK_TA_CGROUP;
 
        switch (act) {
                case __BLK_TA_QUEUE:
@@ -1734,7 +1745,7 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
                       struct per_dev_info *pdi)
 {
        if (text_output) {
-               if (t->action == BLK_TN_MESSAGE)
+               if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE)
                        handle_notify(t);
                else if (t->action & BLK_TC_ACT(BLK_TC_PC))
                        dump_trace_pc(t, pdi, pci);
@@ -1749,7 +1760,7 @@ static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
 
        if (bin_output_msgs ||
                            !(t->action & BLK_TC_ACT(BLK_TC_NOTIFY) &&
-                             t->action == BLK_TN_MESSAGE))
+                             (t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
                output_binary(t, sizeof(*t) + t->pdu_len);
 }
 
@@ -2325,7 +2336,7 @@ static void show_entries_rb(int force)
                        break;
                }
 
-               if (!(bit->action == BLK_TN_MESSAGE) &&
+               if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) &&
                    check_sequence(pdi, t, force))
                        break;
 
@@ -2337,7 +2348,7 @@ static void show_entries_rb(int force)
                if (!pci || pci->cpu != bit->cpu)
                        pci = get_cpu_info(pdi, bit->cpu);
 
-               if (!(bit->action == BLK_TN_MESSAGE))
+               if (!((bit->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE))
                        pci->last_sequence = bit->sequence;
 
                pci->nelems++;
@@ -2471,7 +2482,7 @@ static int read_events(int fd, int always_block, int *fdblock)
                /*
                 * not a real trace, so grab and handle it here
                 */
-               if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
+               if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
                        handle_notify(bit);
                        output_binary(bit, sizeof(*bit) + bit->pdu_len);
                        continue;
@@ -2620,7 +2631,7 @@ static int ms_prime(struct ms_stream *msp)
                        continue;
                }
 
-               if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && bit->action != BLK_TN_MESSAGE) {
+               if (bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) && (bit->action & ~__BLK_TN_CGROUP) != BLK_TN_MESSAGE) {
                        handle_notify(bit);
                        output_binary(bit, sizeof(*bit) + bit->pdu_len);
                        bit_free(bit);
index c42e6d7b7219d532c4ae34d937e0ea9660535426..df2f6ce2148a473e20f5cd9afd381f0021bfd42d 100644 (file)
@@ -205,6 +205,21 @@ static void print_field(char *act, struct per_cpu_info *pci,
        case 'e':
                fprintf(ofp, strcat(format, "d"), t->error);
                break;
+       case 'g': {
+               char cgidstr[24];
+               u32 ino = 0, gen = 0;
+
+               if (t->action & __BLK_TA_CGROUP) {
+                       struct blk_io_cgroup_payload *cgid =
+                               (struct blk_io_cgroup_payload *)pdu_buf;
+
+                       ino = cgid->ino;
+                       gen = cgid->gen;
+               }
+               sprintf(cgidstr, "%x,%x", ino, gen);
+               fprintf(ofp, strcat(format, "s"), cgidstr);
+               break;
+       }
        case 'M':
                fprintf(ofp, strcat(format, "d"), MAJOR(t->device));
                break;
index b22221828f41c4509dacc2753cd664bf0120797e..8c760b8dd2600eaf87245bbc292af04ba00bf8db 100644 (file)
@@ -51,6 +51,7 @@ enum {
        __BLK_TA_REMAP,                 /* bio was remapped */
        __BLK_TA_ABORT,                 /* request aborted */
        __BLK_TA_DRV_DATA,              /* binary driver data */
+       __BLK_TA_CGROUP = 1 << 8,
 };
 
 /*
@@ -60,6 +61,7 @@ enum blktrace_notify {
        __BLK_TN_PROCESS = 0,           /* establish pid/name mapping */
        __BLK_TN_TIMESTAMP,             /* include system clock */
        __BLK_TN_MESSAGE,               /* Character string message */
+       __BLK_TN_CGROUP = __BLK_TA_CGROUP,
 };
 
 /*
@@ -116,6 +118,14 @@ struct blk_io_trace_remap {
        __u64 sector_from;
 };
 
+/*
+ * Payload with originating cgroup info
+ */
+struct blk_io_cgroup_payload {
+       __u32 ino;
+       __u32 gen;
+};
+
 /*
  * User setup structure passed with BLKSTARTTRACE
  */
index e494b6eca2237e79cd682142be462879713ce0be..4c26baffc56c35de343b1b48ca77389ff2d3bb1a 100644 (file)
@@ -332,6 +332,10 @@ the event's device (separated by a comma).
 .IP \fBe\fR 4
 Error value
 
+.IP \fBg\fR 4
+Cgroup identifier of the cgroup that generated the IO. Note that this requires
+appropriate kernel support (kernel version at least 4.14).
+
 .IP \fBm\fR 4
 Minor number of event's device.
 
index 3647c7502142c3a32066c26c3f6f4a072bca8caa..836ac4a35f867019623b11411de50bdc409070dd 100644 (file)
@@ -601,6 +601,9 @@ Specifier & \\ \hline\hline
 the event's device \\
          & (separated by a comma). \\ \hline
 \emph{e} & Error value \\ \hline
+\emph{g} & Cgroup identifier of the cgroup that generated the IO. Note that this requires
+appropriate \\
+         & kernel support (kernel version at least 4.14). \\ \hline
 \emph{m} & Minor number of event's device. \\ \hline
 \emph{M} & Major number of event's device. \\ \hline
 \emph{n} & Number of blocks \\ \hline