Merge tag 'md-3.6' of git://neil.brown.name/md
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Aug 2012 18:34:40 +0000 (11:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Aug 2012 18:34:40 +0000 (11:34 -0700)
Pull additional md update from NeilBrown:
 "This contains a few patches that depend on plugging changes in the
  block layer so needed to wait for those.

  It also contains a Kconfig fix for the new RAID10 support in dm-raid."

* tag 'md-3.6' of git://neil.brown.name/md:
  md/dm-raid: DM_RAID should select MD_RAID10
  md/raid1: submit IO from originating thread instead of md thread.
  raid5: raid5d handle stripe in batch way
  raid5: make_request use batch stripe release

drivers/md/Kconfig
drivers/md/bitmap.c
drivers/md/raid1.c
drivers/md/raid5.c
drivers/md/raid5.h

index 1eee45b69b71617f9108a88b10085a56bef42c3c..d949b781f6f8b3df59e3952936e3cec7c5d769da 100644 (file)
@@ -268,13 +268,14 @@ config DM_MIRROR
          needed for live data migration tools such as 'pvmove'.
 
 config DM_RAID
-       tristate "RAID 1/4/5/6 target"
+       tristate "RAID 1/4/5/6/10 target"
        depends on BLK_DEV_DM
        select MD_RAID1
+       select MD_RAID10
        select MD_RAID456
        select BLK_DEV_MD
        ---help---
-        A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings
+        A dm target that supports RAID1, RAID10, RAID4, RAID5 and RAID6 mappings
 
         A RAID-5 set of N drives with a capacity of C MB per drive provides
         the capacity of C * (N - 1) MB, and protects against a failure
index 15dbe03117e473da9ce6166b6978b7f496b7bc2a..94e7f6ba2e11e1c4479d82b8e840f38cb89e0f0b 100644 (file)
@@ -1305,7 +1305,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
                        prepare_to_wait(&bitmap->overflow_wait, &__wait,
                                        TASK_UNINTERRUPTIBLE);
                        spin_unlock_irq(&bitmap->counts.lock);
-                       io_schedule();
+                       schedule();
                        finish_wait(&bitmap->overflow_wait, &__wait);
                        continue;
                }
index 9f7f8bee84423f1a7dd35cc33bda9874407af06e..611b5f79761826f8843ede2384c47ccbd0a87be1 100644 (file)
@@ -944,6 +944,44 @@ do_sync_io:
        pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
 }
 
+struct raid1_plug_cb {
+       struct blk_plug_cb      cb;
+       struct bio_list         pending;
+       int                     pending_cnt;
+};
+
+static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
+{
+       struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb,
+                                                 cb);
+       struct mddev *mddev = plug->cb.data;
+       struct r1conf *conf = mddev->private;
+       struct bio *bio;
+
+       if (from_schedule) {
+               spin_lock_irq(&conf->device_lock);
+               bio_list_merge(&conf->pending_bio_list, &plug->pending);
+               conf->pending_count += plug->pending_cnt;
+               spin_unlock_irq(&conf->device_lock);
+               md_wakeup_thread(mddev->thread);
+               kfree(plug);
+               return;
+       }
+
+       /* we aren't scheduling, so we can do the write-out directly. */
+       bio = bio_list_get(&plug->pending);
+       bitmap_unplug(mddev->bitmap);
+       wake_up(&conf->wait_barrier);
+
+       while (bio) { /* submit pending writes */
+               struct bio *next = bio->bi_next;
+               bio->bi_next = NULL;
+               generic_make_request(bio);
+               bio = next;
+       }
+       kfree(plug);
+}
+
 static void make_request(struct mddev *mddev, struct bio * bio)
 {
        struct r1conf *conf = mddev->private;
@@ -957,6 +995,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
        const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
        struct md_rdev *blocked_rdev;
+       struct blk_plug_cb *cb;
+       struct raid1_plug_cb *plug = NULL;
        int first_clone;
        int sectors_handled;
        int max_sectors;
@@ -1259,11 +1299,22 @@ read_again:
                mbio->bi_private = r1_bio;
 
                atomic_inc(&r1_bio->remaining);
+
+               cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
+               if (cb)
+                       plug = container_of(cb, struct raid1_plug_cb, cb);
+               else
+                       plug = NULL;
                spin_lock_irqsave(&conf->device_lock, flags);
-               bio_list_add(&conf->pending_bio_list, mbio);
-               conf->pending_count++;
+               if (plug) {
+                       bio_list_add(&plug->pending, mbio);
+                       plug->pending_cnt++;
+               } else {
+                       bio_list_add(&conf->pending_bio_list, mbio);
+                       conf->pending_count++;
+               }
                spin_unlock_irqrestore(&conf->device_lock, flags);
-               if (!mddev_check_plugged(mddev))
+               if (!plug)
                        md_wakeup_thread(mddev->thread);
        }
        /* Mustn't call r1_bio_write_done before this next test,
index 87a2d0bdedd1187a695a4d7f25a6d2e5bc2164fe..adda94df5eb2352775e64fb7fae4e88c6e89a98b 100644 (file)
@@ -484,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                } else {
                        if (atomic_read(&sh->count)) {
                                BUG_ON(!list_empty(&sh->lru)
-                                   && !test_bit(STRIPE_EXPANDING, &sh->state));
+                                   && !test_bit(STRIPE_EXPANDING, &sh->state)
+                                   && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
                        } else {
                                if (!test_bit(STRIPE_HANDLE, &sh->state))
                                        atomic_inc(&conf->active_stripes);
@@ -4010,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
        return sh;
 }
 
+struct raid5_plug_cb {
+       struct blk_plug_cb      cb;
+       struct list_head        list;
+};
+
+static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
+{
+       struct raid5_plug_cb *cb = container_of(
+               blk_cb, struct raid5_plug_cb, cb);
+       struct stripe_head *sh;
+       struct mddev *mddev = cb->cb.data;
+       struct r5conf *conf = mddev->private;
+
+       if (cb->list.next && !list_empty(&cb->list)) {
+               spin_lock_irq(&conf->device_lock);
+               while (!list_empty(&cb->list)) {
+                       sh = list_first_entry(&cb->list, struct stripe_head, lru);
+                       list_del_init(&sh->lru);
+                       /*
+                        * avoid race release_stripe_plug() sees
+                        * STRIPE_ON_UNPLUG_LIST clear but the stripe
+                        * is still in our list
+                        */
+                       smp_mb__before_clear_bit();
+                       clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
+                       __release_stripe(conf, sh);
+               }
+               spin_unlock_irq(&conf->device_lock);
+       }
+       kfree(cb);
+}
+
+static void release_stripe_plug(struct mddev *mddev,
+                               struct stripe_head *sh)
+{
+       struct blk_plug_cb *blk_cb = blk_check_plugged(
+               raid5_unplug, mddev,
+               sizeof(struct raid5_plug_cb));
+       struct raid5_plug_cb *cb;
+
+       if (!blk_cb) {
+               release_stripe(sh);
+               return;
+       }
+
+       cb = container_of(blk_cb, struct raid5_plug_cb, cb);
+
+       if (cb->list.next == NULL)
+               INIT_LIST_HEAD(&cb->list);
+
+       if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
+               list_add_tail(&sh->lru, &cb->list);
+       else
+               release_stripe(sh);
+}
+
 static void make_request(struct mddev *mddev, struct bio * bi)
 {
        struct r5conf *conf = mddev->private;
@@ -4138,8 +4195,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                        if ((bi->bi_rw & REQ_NOIDLE) &&
                            !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
                                atomic_inc(&conf->preread_active_stripes);
-                       mddev_check_plugged(mddev);
-                       release_stripe(sh);
+                       release_stripe_plug(mddev, sh);
                } else {
                        /* cannot get stripe for read-ahead, just give-up */
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -4537,6 +4593,30 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
        return handled;
 }
 
+#define MAX_STRIPE_BATCH 8
+static int handle_active_stripes(struct r5conf *conf)
+{
+       struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
+       int i, batch_size = 0;
+
+       while (batch_size < MAX_STRIPE_BATCH &&
+                       (sh = __get_priority_stripe(conf)) != NULL)
+               batch[batch_size++] = sh;
+
+       if (batch_size == 0)
+               return batch_size;
+       spin_unlock_irq(&conf->device_lock);
+
+       for (i = 0; i < batch_size; i++)
+               handle_stripe(batch[i]);
+
+       cond_resched();
+
+       spin_lock_irq(&conf->device_lock);
+       for (i = 0; i < batch_size; i++)
+               __release_stripe(conf, batch[i]);
+       return batch_size;
+}
 
 /*
  * This is our raid5 kernel thread.
@@ -4547,7 +4627,6 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
  */
 static void raid5d(struct mddev *mddev)
 {
-       struct stripe_head *sh;
        struct r5conf *conf = mddev->private;
        int handled;
        struct blk_plug plug;
@@ -4561,6 +4640,7 @@ static void raid5d(struct mddev *mddev)
        spin_lock_irq(&conf->device_lock);
        while (1) {
                struct bio *bio;
+               int batch_size;
 
                if (
                    !list_empty(&conf->bitmap_list)) {
@@ -4584,21 +4664,16 @@ static void raid5d(struct mddev *mddev)
                        handled++;
                }
 
-               sh = __get_priority_stripe(conf);
-
-               if (!sh)
+               batch_size = handle_active_stripes(conf);
+               if (!batch_size)
                        break;
-               spin_unlock_irq(&conf->device_lock);
-               
-               handled++;
-               handle_stripe(sh);
-               release_stripe(sh);
-               cond_resched();
+               handled += batch_size;
 
-               if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+               if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) {
+                       spin_unlock_irq(&conf->device_lock);
                        md_check_recovery(mddev);
-
-               spin_lock_irq(&conf->device_lock);
+                       spin_lock_irq(&conf->device_lock);
+               }
        }
        pr_debug("%d stripes handled\n", handled);
 
index 61dbb615c30b0f7174d834c01c316a910dda212a..a9fc24901edad817b599219e9fb4c596c4c9e29b 100644 (file)
@@ -321,6 +321,7 @@ enum {
        STRIPE_BIOFILL_RUN,
        STRIPE_COMPUTE_RUN,
        STRIPE_OPS_REQ_PENDING,
+       STRIPE_ON_UNPLUG_LIST,
 };
 
 /*