md/raid1: use bucket based mechanism for IO serialization
authorGuoqing Jiang <guoqing.jiang@cloud.ionos.com>
Mon, 23 Dec 2019 09:49:01 +0000 (10:49 +0100)
committerSong Liu <songliubraving@fb.com>
Mon, 13 Jan 2020 19:44:10 +0000 (11:44 -0800)
Since raid1 had already used bucket based mechanism to reduce
the conflict between write IO and resync IO, it is possible to
speed up performance for io serialization with refer to the
same mechanism.

To align with the barrier bucket mechanism, we created arrays
(with the same number of BARRIER_BUCKETS_NR) for spinlock, rb
tree and waitqueue. Then we can reduce lock competition with
multiple spinlocks, boost search performance with multiple rb
trees and also reduce thundering herd problem with multiple
waitqueues.

Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
drivers/md/md.c
drivers/md/raid1.c

index 9c4e61c988acf396ec04119b9be4de902c7484c7..4824d50526fabbace9e6f9ad270b65bfbbe734d6 100644 (file)
@@ -130,7 +130,7 @@ static void rdev_uninit_serial(struct md_rdev *rdev)
        if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
                return;
 
-       kfree(rdev->serial);
+       kvfree(rdev->serial);
        rdev->serial = NULL;
 }
 
@@ -144,18 +144,26 @@ static void rdevs_uninit_serial(struct mddev *mddev)
 
 static int rdev_init_serial(struct md_rdev *rdev)
 {
+       /* serial_nums equals with BARRIER_BUCKETS_NR */
+       int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
        struct serial_in_rdev *serial = NULL;
 
        if (test_bit(CollisionCheck, &rdev->flags))
                return 0;
 
-       serial = kmalloc(sizeof(struct serial_in_rdev), GFP_KERNEL);
+       serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
+                         GFP_KERNEL);
        if (!serial)
                return -ENOMEM;
 
-       spin_lock_init(&serial->serial_lock);
-       serial->serial_rb = RB_ROOT_CACHED;
-       init_waitqueue_head(&serial->serial_io_wait);
+       for (i = 0; i < serial_nums; i++) {
+               struct serial_in_rdev *serial_tmp = &serial[i];
+
+               spin_lock_init(&serial_tmp->serial_lock);
+               serial_tmp->serial_rb = RB_ROOT_CACHED;
+               init_waitqueue_head(&serial_tmp->serial_io_wait);
+       }
+
        rdev->serial = serial;
        set_bit(CollisionCheck, &rdev->flags);
 
index 5c6a037474480db9f9e8a4503222fa39d8c7d7a4..48d553d7989a4c9de09a1ace48031e934ab1945d 100644 (file)
@@ -62,7 +62,8 @@ static int check_and_add_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
        unsigned long flags;
        int ret = 0;
        struct mddev *mddev = rdev->mddev;
-       struct serial_in_rdev *serial = rdev->serial;
+       int idx = sector_to_idx(lo);
+       struct serial_in_rdev *serial = &rdev->serial[idx];
 
        si = mempool_alloc(mddev->serial_info_pool, GFP_NOIO);
 
@@ -87,7 +88,8 @@ static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
        unsigned long flags;
        int found = 0;
        struct mddev *mddev = rdev->mddev;
-       struct serial_in_rdev *serial = rdev->serial;
+       int idx = sector_to_idx(lo);
+       struct serial_in_rdev *serial = &rdev->serial[idx];
 
        spin_lock_irqsave(&serial->serial_lock, flags);
        for (si = raid1_rb_iter_first(&serial->serial_rb, lo, hi);
@@ -1486,7 +1488,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
        for (i = 0; i < disks; i++) {
                struct bio *mbio = NULL;
                struct md_rdev *rdev = conf->mirrors[i].rdev;
-               struct serial_in_rdev *serial = rdev->serial;
+               int idx = sector_to_idx(lo);
+               struct serial_in_rdev *serial = &rdev->serial[idx];
                if (!r1_bio->bios[i])
                        continue;