md: don't fail action_store() if sync_thread is not registered
authorYu Kuai <yukuai3@huawei.com>
Tue, 11 Jun 2024 13:22:46 +0000 (21:22 +0800)
committerSong Liu <song@kernel.org>
Wed, 12 Jun 2024 16:27:50 +0000 (16:27 +0000)
MD_RECOVERY_RUNNING will always be set when trying to register a new
sync_thread, however, if md_start_sync() turns out to do nothing,
MD_RECOVERY_RUNNING will be cleared in this case. And during the race
window, action_store() will return -EBUSY, which will cause some
mdadm tests to fail. For example:

The test 07reshape5intr will add a new disk to array, then start
reshape:

mdadm /dev/md0 --add /dev/xxx
mdadm --grow /dev/md0 -n 3

And add_bound_rdev() from mdadm --add will set MD_RECOVERY_NEEDED,
then during the race windown, mdadm --grow will fail.

Fix the problem by waiting in action_store() during the race window,
fail only if sync_thread is registered.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240611132251.1967786-8-yukuai1@huaweicloud.com
drivers/md/md.c
drivers/md/md.h

index 44cb18ec1c52d921b3d846eb36dc4a5527652563..86abd0fe0681ac7a3d5fda6552608602b2310946 100644 (file)
@@ -753,7 +753,6 @@ int mddev_init(struct mddev *mddev)
 
        mutex_init(&mddev->open_mutex);
        mutex_init(&mddev->reconfig_mutex);
-       mutex_init(&mddev->sync_mutex);
        mutex_init(&mddev->suspend_mutex);
        mutex_init(&mddev->bitmap_info.mutex);
        INIT_LIST_HEAD(&mddev->disks);
@@ -5021,34 +5020,6 @@ void md_unfrozen_sync_thread(struct mddev *mddev)
 }
 EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
 
-static void idle_sync_thread(struct mddev *mddev)
-{
-       mutex_lock(&mddev->sync_mutex);
-       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
-       if (mddev_lock(mddev)) {
-               mutex_unlock(&mddev->sync_mutex);
-               return;
-       }
-
-       stop_sync_thread(mddev, false);
-       mutex_unlock(&mddev->sync_mutex);
-}
-
-static void frozen_sync_thread(struct mddev *mddev)
-{
-       mutex_lock(&mddev->sync_mutex);
-       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
-       if (mddev_lock(mddev)) {
-               mutex_unlock(&mddev->sync_mutex);
-               return;
-       }
-
-       stop_sync_thread(mddev, false);
-       mutex_unlock(&mddev->sync_mutex);
-}
-
 static int mddev_start_reshape(struct mddev *mddev)
 {
        int ret;
@@ -5056,24 +5027,13 @@ static int mddev_start_reshape(struct mddev *mddev)
        if (mddev->pers->start_reshape == NULL)
                return -EINVAL;
 
-       ret = mddev_lock(mddev);
-       if (ret)
-               return ret;
-
-       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
-               mddev_unlock(mddev);
-               return -EBUSY;
-       }
-
        if (mddev->reshape_position == MaxSector ||
            mddev->pers->check_reshape == NULL ||
            mddev->pers->check_reshape(mddev)) {
                clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                ret = mddev->pers->start_reshape(mddev);
-               if (ret) {
-                       mddev_unlock(mddev);
+               if (ret)
                        return ret;
-               }
        } else {
                /*
                 * If reshape is still in progress, and md_check_recovery() can
@@ -5083,7 +5043,6 @@ static int mddev_start_reshape(struct mddev *mddev)
                clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
        }
 
-       mddev_unlock(mddev);
        sysfs_notify_dirent_safe(mddev->sysfs_degraded);
        return 0;
 }
@@ -5097,36 +5056,53 @@ action_store(struct mddev *mddev, const char *page, size_t len)
        if (!mddev->pers || !mddev->pers->sync_request)
                return -EINVAL;
 
+retry:
+       if (work_busy(&mddev->sync_work))
+               flush_work(&mddev->sync_work);
+
+       ret = mddev_lock(mddev);
+       if (ret)
+               return ret;
+
+       if (work_busy(&mddev->sync_work)) {
+               mddev_unlock(mddev);
+               goto retry;
+       }
+
        action = md_sync_action_by_name(page);
 
        /* TODO: mdadm rely on "idle" to start sync_thread. */
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
                switch (action) {
                case ACTION_FROZEN:
-                       frozen_sync_thread(mddev);
-                       return len;
+                       md_frozen_sync_thread(mddev);
+                       ret = len;
+                       goto out;
                case ACTION_IDLE:
-                       idle_sync_thread(mddev);
+                       md_idle_sync_thread(mddev);
                        break;
                case ACTION_RESHAPE:
                case ACTION_RECOVER:
                case ACTION_CHECK:
                case ACTION_REPAIR:
                case ACTION_RESYNC:
-                       return -EBUSY;
+                       ret = -EBUSY;
+                       goto out;
                default:
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto out;
                }
        } else {
                switch (action) {
                case ACTION_FROZEN:
                        set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-                       return len;
+                       ret = len;
+                       goto out;
                case ACTION_RESHAPE:
                        clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                        ret = mddev_start_reshape(mddev);
                        if (ret)
-                               return ret;
+                               goto out;
                        break;
                case ACTION_RECOVER:
                        clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -5144,7 +5120,8 @@ action_store(struct mddev *mddev, const char *page, size_t len)
                        clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                        break;
                default:
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto out;
                }
        }
 
@@ -5152,14 +5129,18 @@ action_store(struct mddev *mddev, const char *page, size_t len)
                /* A write to sync_action is enough to justify
                 * canceling read-auto mode
                 */
-               flush_work(&mddev->sync_work);
                mddev->ro = MD_RDWR;
                md_wakeup_thread(mddev->sync_thread);
        }
+
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
        sysfs_notify_dirent_safe(mddev->sysfs_action);
-       return len;
+       ret = len;
+
+out:
+       mddev_unlock(mddev);
+       return ret;
 }
 
 static struct md_sysfs_entry md_scan_mode =
index 88add162b08ea0a50f63783f80cb1a1bbd539df7..732053b905b229df3abd40521274c6f688a1a89f 100644 (file)
@@ -595,8 +595,6 @@ struct mddev {
         */
        struct list_head                deleting;
 
-       /* Used to synchronize idle and frozen for action_store() */
-       struct mutex                    sync_mutex;
        /* The sequence number for sync thread */
        atomic_t sync_seq;