Merge tag 'usb-6.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
[linux-block.git] / block / blk-throttle.c
index 9f5fe62afff9284918330e187a29fc00218e1d71..847721dc2b2b8160f501bbc06bba2f055b2513de 100644 (file)
@@ -329,8 +329,8 @@ static struct bio *throtl_pop_queued(struct list_head *queued,
 /* init a service_queue, assumes the caller zeroed it */
 static void throtl_service_queue_init(struct throtl_service_queue *sq)
 {
-       INIT_LIST_HEAD(&sq->queued[0]);
-       INIT_LIST_HEAD(&sq->queued[1]);
+       INIT_LIST_HEAD(&sq->queued[READ]);
+       INIT_LIST_HEAD(&sq->queued[WRITE]);
        sq->pending_tree = RB_ROOT_CACHED;
        timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
 }
@@ -420,24 +420,17 @@ static void tg_update_has_rules(struct throtl_grp *tg)
        struct throtl_grp *parent_tg = sq_to_tg(tg->service_queue.parent_sq);
        struct throtl_data *td = tg->td;
        int rw;
-       int has_iops_limit = 0;
 
        for (rw = READ; rw <= WRITE; rw++) {
-               unsigned int iops_limit = tg_iops_limit(tg, rw);
-
-               tg->has_rules[rw] = (parent_tg && parent_tg->has_rules[rw]) ||
+               tg->has_rules_iops[rw] =
+                       (parent_tg && parent_tg->has_rules_iops[rw]) ||
                        (td->limit_valid[td->limit_index] &&
-                        (tg_bps_limit(tg, rw) != U64_MAX ||
-                         iops_limit != UINT_MAX));
-
-               if (iops_limit != UINT_MAX)
-                       has_iops_limit = 1;
+                         tg_iops_limit(tg, rw) != UINT_MAX);
+               tg->has_rules_bps[rw] =
+                       (parent_tg && parent_tg->has_rules_bps[rw]) ||
+                       (td->limit_valid[td->limit_index] &&
+                        (tg_bps_limit(tg, rw) != U64_MAX));
        }
-
-       if (has_iops_limit)
-               tg->flags |= THROTL_TG_HAS_IOPS_LIMIT;
-       else
-               tg->flags &= ~THROTL_TG_HAS_IOPS_LIMIT;
 }
 
 static void throtl_pd_online(struct blkg_policy_data *pd)
@@ -520,7 +513,6 @@ static void throtl_rb_erase(struct rb_node *n,
 {
        rb_erase_cached(n, &parent_sq->pending_tree);
        RB_CLEAR_NODE(n);
-       --parent_sq->nr_pending;
 }
 
 static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
@@ -572,7 +564,11 @@ static void throtl_enqueue_tg(struct throtl_grp *tg)
 static void throtl_dequeue_tg(struct throtl_grp *tg)
 {
        if (tg->flags & THROTL_TG_PENDING) {
-               throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
+               struct throtl_service_queue *parent_sq =
+                       tg->service_queue.parent_sq;
+
+               throtl_rb_erase(&tg->rb_node, parent_sq);
+               --parent_sq->nr_pending;
                tg->flags &= ~THROTL_TG_PENDING;
        }
 }
@@ -639,6 +635,8 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
 {
        tg->bytes_disp[rw] = 0;
        tg->io_disp[rw] = 0;
+       tg->carryover_bytes[rw] = 0;
+       tg->carryover_ios[rw] = 0;
 
        /*
         * Previous slice has expired. We must have trimmed it after last
@@ -656,12 +654,17 @@ static inline void throtl_start_new_slice_with_credit(struct throtl_grp *tg,
                   tg->slice_end[rw], jiffies);
 }
 
-static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
+static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw,
+                                         bool clear_carryover)
 {
        tg->bytes_disp[rw] = 0;
        tg->io_disp[rw] = 0;
        tg->slice_start[rw] = jiffies;
        tg->slice_end[rw] = jiffies + tg->td->throtl_slice;
+       if (clear_carryover) {
+               tg->carryover_bytes[rw] = 0;
+               tg->carryover_ios[rw] = 0;
+       }
 
        throtl_log(&tg->service_queue,
                   "[%c] new slice start=%lu end=%lu jiffies=%lu",
@@ -754,33 +757,20 @@ static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
                   tg->slice_start[rw], tg->slice_end[rw], jiffies);
 }
 
-static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
-                                 u32 iops_limit, unsigned long *wait)
+static unsigned int calculate_io_allowed(u32 iops_limit,
+                                        unsigned long jiffy_elapsed)
 {
-       bool rw = bio_data_dir(bio);
        unsigned int io_allowed;
-       unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
        u64 tmp;
 
-       if (iops_limit == UINT_MAX) {
-               if (wait)
-                       *wait = 0;
-               return true;
-       }
-
-       jiffy_elapsed = jiffies - tg->slice_start[rw];
-
-       /* Round up to the next throttle slice, wait time must be nonzero */
-       jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
-
        /*
-        * jiffy_elapsed_rnd should not be a big value as minimum iops can be
+        * jiffy_elapsed should not be a big value as minimum iops can be
         * 1 then at max jiffy elapsed should be equivalent of 1 second as we
         * will allow dispatch after 1 second and after that slice should
         * have been trimmed.
         */
 
-       tmp = (u64)iops_limit * jiffy_elapsed_rnd;
+       tmp = (u64)iops_limit * jiffy_elapsed;
        do_div(tmp, HZ);
 
        if (tmp > UINT_MAX)
@@ -788,6 +778,68 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
        else
                io_allowed = tmp;
 
+       return io_allowed;
+}
+
+static u64 calculate_bytes_allowed(u64 bps_limit, unsigned long jiffy_elapsed)
+{
+       return mul_u64_u64_div_u64(bps_limit, (u64)jiffy_elapsed, (u64)HZ);
+}
+
+static void __tg_update_carryover(struct throtl_grp *tg, bool rw)
+{
+       unsigned long jiffy_elapsed = jiffies - tg->slice_start[rw];
+       u64 bps_limit = tg_bps_limit(tg, rw);
+       u32 iops_limit = tg_iops_limit(tg, rw);
+
+       /*
+        * If config is updated while bios are still throttled, calculate and
+        * accumulate how many bytes/ios are waited across changes. And
+        * carryover_bytes/ios will be used to calculate new wait time under new
+        * configuration.
+        */
+       if (bps_limit != U64_MAX)
+               tg->carryover_bytes[rw] +=
+                       calculate_bytes_allowed(bps_limit, jiffy_elapsed) -
+                       tg->bytes_disp[rw];
+       if (iops_limit != UINT_MAX)
+               tg->carryover_ios[rw] +=
+                       calculate_io_allowed(iops_limit, jiffy_elapsed) -
+                       tg->io_disp[rw];
+}
+
+static void tg_update_carryover(struct throtl_grp *tg)
+{
+       if (tg->service_queue.nr_queued[READ])
+               __tg_update_carryover(tg, READ);
+       if (tg->service_queue.nr_queued[WRITE])
+               __tg_update_carryover(tg, WRITE);
+
+       /* see comments in struct throtl_grp for meaning of these fields. */
+       throtl_log(&tg->service_queue, "%s: %llu %llu %u %u\n", __func__,
+                  tg->carryover_bytes[READ], tg->carryover_bytes[WRITE],
+                  tg->carryover_ios[READ], tg->carryover_ios[WRITE]);
+}
+
+static bool tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio,
+                                u32 iops_limit, unsigned long *wait)
+{
+       bool rw = bio_data_dir(bio);
+       unsigned int io_allowed;
+       unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
+
+       if (iops_limit == UINT_MAX) {
+               if (wait)
+                       *wait = 0;
+               return true;
+       }
+
+       jiffy_elapsed = jiffies - tg->slice_start[rw];
+
+       /* Round up to the next throttle slice, wait time must be nonzero */
+       jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
+       io_allowed = calculate_io_allowed(iops_limit, jiffy_elapsed_rnd) +
+                    tg->carryover_ios[rw];
        if (tg->io_disp[rw] + 1 <= io_allowed) {
                if (wait)
                        *wait = 0;
@@ -802,16 +854,16 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
        return false;
 }
 
-static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
-                                u64 bps_limit, unsigned long *wait)
+static bool tg_within_bps_limit(struct throtl_grp *tg, struct bio *bio,
+                               u64 bps_limit, unsigned long *wait)
 {
        bool rw = bio_data_dir(bio);
-       u64 bytes_allowed, extra_bytes, tmp;
+       u64 bytes_allowed, extra_bytes;
        unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
        unsigned int bio_size = throtl_bio_data_size(bio);
 
        /* no need to throttle if this bio's bytes have been accounted */
-       if (bps_limit == U64_MAX || bio_flagged(bio, BIO_THROTTLED)) {
+       if (bps_limit == U64_MAX || bio_flagged(bio, BIO_BPS_THROTTLED)) {
                if (wait)
                        *wait = 0;
                return true;
@@ -824,11 +876,8 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
                jiffy_elapsed_rnd = tg->td->throtl_slice;
 
        jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
-
-       tmp = bps_limit * jiffy_elapsed_rnd;
-       do_div(tmp, HZ);
-       bytes_allowed = tmp;
-
+       bytes_allowed = calculate_bytes_allowed(bps_limit, jiffy_elapsed_rnd) +
+                       tg->carryover_bytes[rw];
        if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
                if (wait)
                        *wait = 0;
@@ -889,7 +938,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
         * slice and it should be extended instead.
         */
        if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
-               throtl_start_new_slice(tg, rw);
+               throtl_start_new_slice(tg, rw, true);
        else {
                if (time_before(tg->slice_end[rw],
                    jiffies + tg->td->throtl_slice))
@@ -897,8 +946,8 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
                                jiffies + tg->td->throtl_slice);
        }
 
-       if (tg_with_in_bps_limit(tg, bio, bps_limit, &bps_wait) &&
-           tg_with_in_iops_limit(tg, bio, iops_limit, &iops_wait)) {
+       if (tg_within_bps_limit(tg, bio, bps_limit, &bps_wait) &&
+           tg_within_iops_limit(tg, bio, iops_limit, &iops_wait)) {
                if (wait)
                        *wait = 0;
                return true;
@@ -921,22 +970,13 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
        unsigned int bio_size = throtl_bio_data_size(bio);
 
        /* Charge the bio to the group */
-       if (!bio_flagged(bio, BIO_THROTTLED)) {
+       if (!bio_flagged(bio, BIO_BPS_THROTTLED)) {
                tg->bytes_disp[rw] += bio_size;
                tg->last_bytes_disp[rw] += bio_size;
        }
 
        tg->io_disp[rw]++;
        tg->last_io_disp[rw]++;
-
-       /*
-        * BIO_THROTTLED is used to prevent the same bio to be throttled
-        * more than once as a throttled bio will go through blk-throtl the
-        * second time when it eventually gets issued.  Set it when a bio
-        * is being charged to a tg.
-        */
-       if (!bio_flagged(bio, BIO_THROTTLED))
-               bio_set_flag(bio, BIO_THROTTLED);
 }
 
 /**
@@ -990,9 +1030,9 @@ static void tg_update_disptime(struct throtl_grp *tg)
        disptime = jiffies + min_wait;
 
        /* Update dispatch time */
-       throtl_dequeue_tg(tg);
+       throtl_rb_erase(&tg->rb_node, tg->service_queue.parent_sq);
        tg->disptime = disptime;
-       throtl_enqueue_tg(tg);
+       tg_service_queue_add(tg);
 
        /* see throtl_add_bio_tg() */
        tg->flags &= ~THROTL_TG_WAS_EMPTY;
@@ -1026,6 +1066,7 @@ static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw)
        sq->nr_queued[rw]--;
 
        throtl_charge_bio(tg, bio);
+       bio_set_flag(bio, BIO_BPS_THROTTLED);
 
        /*
         * If our parent is another tg, we just need to transfer @bio to
@@ -1101,13 +1142,13 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
                if (time_before(jiffies, tg->disptime))
                        break;
 
-               throtl_dequeue_tg(tg);
-
                nr_disp += throtl_dispatch_tg(tg);
 
                sq = &tg->service_queue;
-               if (sq->nr_queued[0] || sq->nr_queued[1])
+               if (sq->nr_queued[READ] || sq->nr_queued[WRITE])
                        tg_update_disptime(tg);
+               else
+                       throtl_dequeue_tg(tg);
 
                if (nr_disp >= THROTL_QUANTUM)
                        break;
@@ -1321,8 +1362,8 @@ static void tg_conf_updated(struct throtl_grp *tg, bool global)
         * that a group's limit are dropped suddenly and we don't want to
         * account recently dispatched IO with new low rate.
         */
-       throtl_start_new_slice(tg, READ);
-       throtl_start_new_slice(tg, WRITE);
+       throtl_start_new_slice(tg, READ, false);
+       throtl_start_new_slice(tg, WRITE, false);
 
        if (tg->flags & THROTL_TG_PENDING) {
                tg_update_disptime(tg);
@@ -1350,6 +1391,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
                v = U64_MAX;
 
        tg = blkg_to_tg(ctx.blkg);
+       tg_update_carryover(tg);
 
        if (is_u64)
                *(u64 *)((void *)tg + of_cft(of)->private) = v;
@@ -1536,6 +1578,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
                return ret;
 
        tg = blkg_to_tg(ctx.blkg);
+       tg_update_carryover(tg);
 
        v[0] = tg->bps_conf[READ][index];
        v[1] = tg->bps_conf[WRITE][index];
@@ -1673,6 +1716,41 @@ struct blkcg_policy blkcg_policy_throtl = {
        .pd_free_fn             = throtl_pd_free,
 };
 
+void blk_throtl_cancel_bios(struct gendisk *disk)
+{
+       struct request_queue *q = disk->queue;
+       struct cgroup_subsys_state *pos_css;
+       struct blkcg_gq *blkg;
+
+       spin_lock_irq(&q->queue_lock);
+       /*
+        * queue_lock is held, rcu lock is not needed here technically.
+        * However, rcu lock is still held to emphasize that following
+        * path need RCU protection and to prevent warning from lockdep.
+        */
+       rcu_read_lock();
+       blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
+               struct throtl_grp *tg = blkg_to_tg(blkg);
+               struct throtl_service_queue *sq = &tg->service_queue;
+
+               /*
+                * Set the flag to make sure throtl_pending_timer_fn() won't
+                * stop until all throttled bios are dispatched.
+                */
+               blkg_to_tg(blkg)->flags |= THROTL_TG_CANCELING;
+               /*
+                * Update disptime after setting the above flag to make sure
+                * throtl_select_dispatch() won't exit without dispatching.
+                */
+               tg_update_disptime(tg);
+
+               throtl_schedule_pending_timer(sq, jiffies + 1);
+       }
+       rcu_read_unlock();
+       spin_unlock_irq(&q->queue_lock);
+}
+
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 static unsigned long __tg_last_low_overflow_time(struct throtl_grp *tg)
 {
        unsigned long rtime = jiffies, wtime = jiffies;
@@ -1777,39 +1855,6 @@ static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
        return false;
 }
 
-void blk_throtl_cancel_bios(struct request_queue *q)
-{
-       struct cgroup_subsys_state *pos_css;
-       struct blkcg_gq *blkg;
-
-       spin_lock_irq(&q->queue_lock);
-       /*
-        * queue_lock is held, rcu lock is not needed here technically.
-        * However, rcu lock is still held to emphasize that following
-        * path need RCU protection and to prevent warning from lockdep.
-        */
-       rcu_read_lock();
-       blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
-               struct throtl_grp *tg = blkg_to_tg(blkg);
-               struct throtl_service_queue *sq = &tg->service_queue;
-
-               /*
-                * Set the flag to make sure throtl_pending_timer_fn() won't
-                * stop until all throttled bios are dispatched.
-                */
-               blkg_to_tg(blkg)->flags |= THROTL_TG_CANCELING;
-               /*
-                * Update disptime after setting the above flag to make sure
-                * throtl_select_dispatch() won't exit without dispatching.
-                */
-               tg_update_disptime(tg);
-
-               throtl_schedule_pending_timer(sq, jiffies + 1);
-       }
-       rcu_read_unlock();
-       spin_unlock_irq(&q->queue_lock);
-}
-
 static bool throtl_can_upgrade(struct throtl_data *td,
        struct throtl_grp *this_tg)
 {
@@ -2005,7 +2050,6 @@ static void blk_throtl_update_idletime(struct throtl_grp *tg)
        tg->checked_last_finish_time = last_finish_time;
 }
 
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
 static void throtl_update_latency_buckets(struct throtl_data *td)
 {
        struct avg_latency_bucket avg_latency[2][LATENCY_BUCKET_SIZE];
@@ -2086,6 +2130,28 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
 static inline void throtl_update_latency_buckets(struct throtl_data *td)
 {
 }
+
+static void blk_throtl_update_idletime(struct throtl_grp *tg)
+{
+}
+
+static void throtl_downgrade_check(struct throtl_grp *tg)
+{
+}
+
+static void throtl_upgrade_check(struct throtl_grp *tg)
+{
+}
+
+static bool throtl_can_upgrade(struct throtl_data *td,
+       struct throtl_grp *this_tg)
+{
+       return false;
+}
+
+static void throtl_upgrade_state(struct throtl_data *td)
+{
+}
 #endif
 
 bool __blk_throtl_bio(struct bio *bio)
@@ -2159,8 +2225,10 @@ again:
                qn = &tg->qnode_on_parent[rw];
                sq = sq->parent_sq;
                tg = sq_to_tg(sq);
-               if (!tg)
+               if (!tg) {
+                       bio_set_flag(bio, BIO_BPS_THROTTLED);
                        goto out_unlock;
+               }
        }
 
        /* out-of-limit, queue to @tg */
@@ -2189,8 +2257,6 @@ again:
        }
 
 out_unlock:
-       bio_set_flag(bio, BIO_THROTTLED);
-
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
        if (throttled || !td->track_bio_latency)
                bio->bi_issue.value |= BIO_ISSUE_THROTL_SKIP_LATENCY;
@@ -2286,8 +2352,9 @@ void blk_throtl_bio_endio(struct bio *bio)
 }
 #endif
 
-int blk_throtl_init(struct request_queue *q)
+int blk_throtl_init(struct gendisk *disk)
 {
+       struct request_queue *q = disk->queue;
        struct throtl_data *td;
        int ret;
 
@@ -2329,8 +2396,10 @@ int blk_throtl_init(struct request_queue *q)
        return ret;
 }
 
-void blk_throtl_exit(struct request_queue *q)
+void blk_throtl_exit(struct gendisk *disk)
 {
+       struct request_queue *q = disk->queue;
+
        BUG_ON(!q->td);
        del_timer_sync(&q->td->service_queue.pending_timer);
        throtl_shutdown_wq(q);
@@ -2340,8 +2409,9 @@ void blk_throtl_exit(struct request_queue *q)
        kfree(q->td);
 }
 
-void blk_throtl_register_queue(struct request_queue *q)
+void blk_throtl_register(struct gendisk *disk)
 {
+       struct request_queue *q = disk->queue;
        struct throtl_data *td;
        int i;