Merge tag 'block-6.10-20240523' of git://git.kernel.dk/linux
[linux-block.git] / block / blk-cgroup.c
index 4b1a35ab0ea4c9c73a910a8c7454bb9ca2229881..37e6cc91d576bf956854635126a0ce02a858b995 100644 (file)
@@ -322,6 +322,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
        blkg->q = disk->queue;
        INIT_LIST_HEAD(&blkg->q_node);
        blkg->blkcg = blkcg;
+       blkg->iostat.blkg = blkg;
 #ifdef CONFIG_BLK_CGROUP_PUNT_BIO
        spin_lock_init(&blkg->async_bio_lock);
        bio_list_init(&blkg->async_bios);
@@ -618,12 +619,45 @@ restart:
        spin_unlock_irq(&q->queue_lock);
 }
 
+static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+       int i;
+
+       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+               dst->bytes[i] = src->bytes[i];
+               dst->ios[i] = src->ios[i];
+       }
+}
+
+static void __blkg_clear_stat(struct blkg_iostat_set *bis)
+{
+       struct blkg_iostat cur = {0};
+       unsigned long flags;
+
+       flags = u64_stats_update_begin_irqsave(&bis->sync);
+       blkg_iostat_set(&bis->cur, &cur);
+       blkg_iostat_set(&bis->last, &cur);
+       u64_stats_update_end_irqrestore(&bis->sync, flags);
+}
+
+static void blkg_clear_stat(struct blkcg_gq *blkg)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               struct blkg_iostat_set *s = per_cpu_ptr(blkg->iostat_cpu, cpu);
+
+               __blkg_clear_stat(s);
+       }
+       __blkg_clear_stat(&blkg->iostat);
+}
+
 static int blkcg_reset_stats(struct cgroup_subsys_state *css,
                             struct cftype *cftype, u64 val)
 {
        struct blkcg *blkcg = css_to_blkcg(css);
        struct blkcg_gq *blkg;
-       int i, cpu;
+       int i;
 
        mutex_lock(&blkcg_pol_mutex);
        spin_lock_irq(&blkcg->lock);
@@ -634,18 +668,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
         * anyway.  If you get hit by a race, retry.
         */
        hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
-               for_each_possible_cpu(cpu) {
-                       struct blkg_iostat_set *bis =
-                               per_cpu_ptr(blkg->iostat_cpu, cpu);
-                       memset(bis, 0, sizeof(*bis));
-
-                       /* Re-initialize the cleared blkg_iostat_set */
-                       u64_stats_init(&bis->sync);
-                       bis->blkg = blkg;
-               }
-               memset(&blkg->iostat, 0, sizeof(blkg->iostat));
-               u64_stats_init(&blkg->iostat.sync);
-
+               blkg_clear_stat(blkg);
                for (i = 0; i < BLKCG_MAX_POLS; i++) {
                        struct blkcg_policy *pol = blkcg_policy[i];
 
@@ -948,16 +971,6 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
 }
 EXPORT_SYMBOL_GPL(blkg_conf_exit);
 
-static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
-{
-       int i;
-
-       for (i = 0; i < BLKG_IOSTAT_NR; i++) {
-               dst->bytes[i] = src->bytes[i];
-               dst->ios[i] = src->ios[i];
-       }
-}
-
 static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
 {
        int i;
@@ -1023,7 +1036,19 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
                struct blkg_iostat cur;
                unsigned int seq;
 
+               /*
+                * Order assignment of `next_bisc` from `bisc->lnode.next` in
+                * llist_for_each_entry_safe and clearing `bisc->lqueued` for
+                * avoiding to assign `next_bisc` with new next pointer added
+                * in blk_cgroup_bio_start() in case of re-ordering.
+                *
+                * The pair barrier is implied in llist_add() in blk_cgroup_bio_start().
+                */
+               smp_mb();
+
                WRITE_ONCE(bisc->lqueued, false);
+               if (bisc == &blkg->iostat)
+                       goto propagate_up; /* propagate up to parent only */
 
                /* fetch the current per-cpu values */
                do {
@@ -1033,10 +1058,24 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
 
                blkcg_iostat_update(blkg, &cur, &bisc->last);
 
+propagate_up:
                /* propagate global delta to parent (unless that's root) */
-               if (parent && parent->parent)
+               if (parent && parent->parent) {
                        blkcg_iostat_update(parent, &blkg->iostat.cur,
                                            &blkg->iostat.last);
+                       /*
+                        * Queue parent->iostat to its blkcg's lockless
+                        * list to propagate up to the grandparent if the
+                        * iostat hasn't been queued yet.
+                        */
+                       if (!parent->iostat.lqueued) {
+                               struct llist_head *plhead;
+
+                               plhead = per_cpu_ptr(parent->blkcg->lhead, cpu);
+                               llist_add(&parent->iostat.lnode, plhead);
+                               parent->iostat.lqueued = true;
+                       }
+               }
        }
        raw_spin_unlock_irqrestore(&blkg_stat_lock, flags);
 out: