writeback: support retrieving per group debug writeback stats of bdi
authorKemeng Shi <shikemeng@huaweicloud.com>
Tue, 23 Apr 2024 03:46:40 +0000 (11:46 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Mon, 6 May 2024 00:53:51 +0000 (17:53 -0700)
Add /sys/kernel/debug/bdi/xxx/wb_stats to show per group writeback stats
of bdi.

Following domain hierarchy is tested:
                global domain (320G)
                /                 \
        cgroup domain1(10G)     cgroup domain2(10G)
                |                 |
bdi            wb1               wb2

/* per wb writeback info of bdi is collected */
cat wb_stats
WbCgIno:                    1
WbWriteback:                0 kB
WbReclaimable:              0 kB
WbDirtyThresh:              0 kB
WbDirtied:                  0 kB
WbWritten:                  0 kB
WbWriteBandwidth:      102400 kBps
b_dirty:                    0
b_io:                       0
b_more_io:                  0
b_dirty_time:               0
state:                      1

WbCgIno:                 4091
WbWriteback:             1792 kB
WbReclaimable:         820512 kB
WbDirtyThresh:        6004692 kB
WbDirtied:            1820448 kB
WbWritten:             999488 kB
WbWriteBandwidth:      169020 kBps
b_dirty:                    0
b_io:                       0
b_more_io:                  1
b_dirty_time:               0
state:                      5

WbCgIno:                 4131
WbWriteback:             1120 kB
WbReclaimable:         820064 kB
WbDirtyThresh:        6004728 kB
WbDirtied:            1822688 kB
WbWritten:            1002400 kB
WbWriteBandwidth:      153520 kBps
b_dirty:                    0
b_io:                       0
b_more_io:                  1
b_dirty_time:               0
state:                      5

[shikemeng@huaweicloud.com: fix build problems]
Link: https://lkml.kernel.org/r/20240423034643.141219-4-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20240423034643.141219-3-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/writeback.h
mm/backing-dev.c
mm/page-writeback.c

index 9845cb62e40b2d95cfe4a8c07d41ae2f7aad2783..112d806ddbe49d3b1541c1feb39b26d59a10e860 100644 (file)
@@ -355,6 +355,7 @@ int dirtytime_interval_handler(struct ctl_table *table, int write,
 
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
+unsigned long cgwb_calc_thresh(struct bdi_writeback *wb);
 
 void wb_update_bandwidth(struct bdi_writeback *wb);
 
index 089146feb8301b687fc6317b4abfa4476860af8c..e61bbb1bd62218f8e97640c5018ee5287b8ee4a0 100644 (file)
@@ -155,19 +155,96 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
 }
 DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats);
 
+static void wb_stats_show(struct seq_file *m, struct bdi_writeback *wb,
+                         struct wb_stats *stats)
+{
+
+       seq_printf(m,
+                  "WbCgIno:           %10lu\n"
+                  "WbWriteback:       %10lu kB\n"
+                  "WbReclaimable:     %10lu kB\n"
+                  "WbDirtyThresh:     %10lu kB\n"
+                  "WbDirtied:         %10lu kB\n"
+                  "WbWritten:         %10lu kB\n"
+                  "WbWriteBandwidth:  %10lu kBps\n"
+                  "b_dirty:           %10lu\n"
+                  "b_io:              %10lu\n"
+                  "b_more_io:         %10lu\n"
+                  "b_dirty_time:      %10lu\n"
+                  "state:             %10lx\n\n",
+#ifdef CONFIG_CGROUP_WRITEBACK
+                  cgroup_ino(wb->memcg_css->cgroup),
+#else
+                  1ul,
+#endif
+                  K(stats->nr_writeback),
+                  K(stats->nr_reclaimable),
+                  K(stats->wb_thresh),
+                  K(stats->nr_dirtied),
+                  K(stats->nr_written),
+                  K(wb->avg_write_bandwidth),
+                  stats->nr_dirty,
+                  stats->nr_io,
+                  stats->nr_more_io,
+                  stats->nr_dirty_time,
+                  wb->state);
+}
+
+static int cgwb_debug_stats_show(struct seq_file *m, void *v)
+{
+       struct backing_dev_info *bdi = m->private;
+       unsigned long background_thresh;
+       unsigned long dirty_thresh;
+       struct bdi_writeback *wb;
+
+       global_dirty_limits(&background_thresh, &dirty_thresh);
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) {
+               struct wb_stats stats = { .dirty_thresh = dirty_thresh };
+
+               if (!wb_tryget(wb))
+                       continue;
+
+               collect_wb_stats(&stats, wb);
+
+               /*
+                * Calculate thresh of wb in writeback cgroup which is min of
+                * thresh in global domain and thresh in cgroup domain. Drop
+                * rcu lock because cgwb_calc_thresh may sleep in
+                * cgroup_rstat_flush. We can do so here because we have a ref.
+                */
+               if (mem_cgroup_wb_domain(wb)) {
+                       rcu_read_unlock();
+                       stats.wb_thresh = min(stats.wb_thresh, cgwb_calc_thresh(wb));
+                       rcu_read_lock();
+               }
+
+               wb_stats_show(m, wb, &stats);
+
+               wb_put(wb);
+       }
+       rcu_read_unlock();
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(cgwb_debug_stats);
+
 static void bdi_debug_register(struct backing_dev_info *bdi, const char *name)
 {
        bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
 
        debugfs_create_file("stats", 0444, bdi->debug_dir, bdi,
                            &bdi_debug_stats_fops);
+       debugfs_create_file("wb_stats", 0444, bdi->debug_dir, bdi,
+                           &cgwb_debug_stats_fops);
 }
 
 static void bdi_debug_unregister(struct backing_dev_info *bdi)
 {
        debugfs_remove_recursive(bdi->debug_dir);
 }
-#else
+#else /* CONFIG_DEBUG_FS */
 static inline void bdi_debug_init(void)
 {
 }
@@ -178,7 +255,7 @@ static inline void bdi_debug_register(struct backing_dev_info *bdi,
 static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
 {
 }
-#endif
+#endif /* CONFIG_DEBUG_FS */
 
 static ssize_t read_ahead_kb_store(struct device *dev,
                                  struct device_attribute *attr,
index 2c1f595d1ddc487008cc55c64d9ef7e612c4ef34..c5fb3121469ad515e128555ce2987b4e72e67d50 100644 (file)
@@ -892,6 +892,25 @@ unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh)
        return __wb_calc_thresh(&gdtc);
 }
 
+unsigned long cgwb_calc_thresh(struct bdi_writeback *wb)
+{
+       struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB };
+       struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) };
+       unsigned long filepages = 0, headroom = 0, writeback = 0;
+
+       gdtc.avail = global_dirtyable_memory();
+       gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) +
+                    global_node_page_state(NR_WRITEBACK);
+
+       mem_cgroup_wb_stats(wb, &filepages, &headroom,
+                           &mdtc.dirty, &writeback);
+       mdtc.dirty += writeback;
+       mdtc_calc_avail(&mdtc, filepages, headroom);
+       domain_dirty_limits(&mdtc);
+
+       return __wb_calc_thresh(&mdtc);
+}
+
 /*
  *                           setpoint - dirty 3
  *        f(dirty) := 1.0 + (----------------)