block: optimise in irq bio put caching
authorPavel Begunkov <asml.silence@gmail.com>
Wed, 7 Feb 2024 14:14:29 +0000 (14:14 +0000)
committerJens Axboe <axboe@kernel.dk>
Thu, 8 Feb 2024 17:18:48 +0000 (10:18 -0700)
When enlisting a bio into ->free_list_irq we protect the list by
disabling irqs. It's likely they're already disabled and performance of
local_irq_{save,restore}() is decent, but it's not zero cost.

Let's only use the irq cache when when we're serving a hard irq, which
allows to remove local_irq_{save,restore}(), and fall back to bio_free()
in all left cases.

Profiles indicate that the bio_put() cost is reduced by ~3.5 times
(1.76% -> 0.49%), and total throughput of a CPU bound benchmark improve
by around 1% (t/io_uring with high QD and several drives).

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/36d207540b7046c653cc16e5ff08fe7234b19f81.1707314970.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/bio.c

index 8da941974f883c0c2ea0a81b9086556b83ccc205..00847ff1415c322c58b370d85ac528624ad85db5 100644 (file)
@@ -762,30 +762,31 @@ static inline void bio_put_percpu_cache(struct bio *bio)
        struct bio_alloc_cache *cache;
 
        cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
-       if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX) {
-               put_cpu();
-               bio_free(bio);
-               return;
-       }
-
-       bio_uninit(bio);
+       if (READ_ONCE(cache->nr_irq) + cache->nr > ALLOC_CACHE_MAX)
+               goto out_free;
 
        if (in_task()) {
+               bio_uninit(bio);
                bio->bi_next = cache->free_list;
                /* Not necessary but helps not to iopoll already freed bios */
                bio->bi_bdev = NULL;
                cache->free_list = bio;
                cache->nr++;
-       } else {
-               unsigned long flags;
+       } else if (in_hardirq()) {
+               lockdep_assert_irqs_disabled();
 
-               local_irq_save(flags);
+               bio_uninit(bio);
                bio->bi_next = cache->free_list_irq;
                cache->free_list_irq = bio;
                cache->nr_irq++;
-               local_irq_restore(flags);
+       } else {
+               goto out_free;
        }
        put_cpu();
+       return;
+out_free:
+       put_cpu();
+       bio_free(bio);
 }
 
 /**