spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
new->reshape_position = MaxSector;
+ new->resync_max = MaxSector;
new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
mddev->major_version = 0;
mddev->minor_version = sb->minor_version;
mddev->patch_version = sb->patch_version;
- mddev->persistent = ! sb->not_persistent;
+ mddev->external = 0;
mddev->chunk_size = sb->chunk_size;
mddev->ctime = sb->ctime;
mddev->utime = sb->utime;
sb->size = mddev->size;
sb->raid_disks = mddev->raid_disks;
sb->md_minor = mddev->md_minor;
- sb->not_persistent = !mddev->persistent;
+ sb->not_persistent = 0;
sb->utime = mddev->utime;
sb->state = 0;
sb->events_hi = (mddev->events>>32);
if (mddev->raid_disks == 0) {
mddev->major_version = 1;
mddev->patch_version = 0;
- mddev->persistent = 1;
+ mddev->external = 0;
mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
MD_BUG();
mddev->events --;
}
- sync_sbs(mddev, nospares);
/*
* do not write anything to disk if using
* nonpersistent superblocks
*/
if (!mddev->persistent) {
- clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+ if (!mddev->external)
+ clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+
spin_unlock_irq(&mddev->write_lock);
wake_up(&mddev->sb_wait);
return;
}
+ sync_sbs(mddev, nospares);
spin_unlock_irq(&mddev->write_lock);
dprintk(KERN_INFO
slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{
char *e;
+ int err;
+ char nm[20];
int slot = simple_strtoul(buf, &e, 10);
if (strncmp(buf, "none", 4)==0)
slot = -1;
else if (e==buf || (*e && *e!= '\n'))
return -EINVAL;
- if (rdev->mddev->pers)
- /* Cannot set slot in active array (yet) */
- return -EBUSY;
- if (slot >= rdev->mddev->raid_disks)
- return -ENOSPC;
- rdev->raid_disk = slot;
- /* assume it is working */
- rdev->flags = 0;
- set_bit(In_sync, &rdev->flags);
+ if (rdev->mddev->pers) {
+ /* Setting 'slot' on an active array requires also
+ * updating the 'rd%d' link, and communicating
+ * with the personality with ->hot_*_disk.
+ * For now we only support removing
+ * failed/spare devices. This normally happens automatically,
+ * but not when the metadata is externally managed.
+ */
+ if (slot != -1)
+ return -EBUSY;
+ if (rdev->raid_disk == -1)
+ return -EEXIST;
+ /* personality does all needed checks */
+ if (rdev->mddev->pers->hot_add_disk == NULL)
+ return -EINVAL;
+ err = rdev->mddev->pers->
+ hot_remove_disk(rdev->mddev, rdev->raid_disk);
+ if (err)
+ return err;
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ sysfs_remove_link(&rdev->mddev->kobj, nm);
+ set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ md_wakeup_thread(rdev->mddev->thread);
+ } else {
+ if (slot >= rdev->mddev->raid_disks)
+ return -ENOSPC;
+ rdev->raid_disk = slot;
+ /* assume it is working */
+ rdev->flags = 0;
+ set_bit(In_sync, &rdev->flags);
+ }
return len;
}
case 0:
if (mddev->in_sync)
st = clean;
+ else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
+ st = write_pending;
else if (mddev->safemode)
st = active_idle;
else
break;
case clear:
/* stopping an active array */
- if (mddev->pers) {
- if (atomic_read(&mddev->active) > 1)
- return -EBUSY;
- err = do_md_stop(mddev, 0);
- }
+ if (atomic_read(&mddev->active) > 1)
+ return -EBUSY;
+ err = do_md_stop(mddev, 0);
break;
case inactive:
/* stopping an active array */
if (atomic_read(&mddev->active) > 1)
return -EBUSY;
err = do_md_stop(mddev, 2);
- }
+ } else
+ err = 0; /* already inactive */
break;
case suspended:
break; /* not supported yet */
restart_array(mddev);
spin_lock_irq(&mddev->write_lock);
if (atomic_read(&mddev->writes_pending) == 0) {
- mddev->in_sync = 1;
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
- }
+ if (mddev->in_sync == 0) {
+ mddev->in_sync = 1;
+ if (mddev->persistent)
+ set_bit(MD_CHANGE_CLEAN,
+ &mddev->flags);
+ }
+ err = 0;
+ } else
+ err = -EBUSY;
spin_unlock_irq(&mddev->write_lock);
} else {
mddev->ro = 0;
case active:
if (mddev->pers) {
restart_array(mddev);
- clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ if (mddev->external)
+ clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
wake_up(&mddev->sb_wait);
err = 0;
} else {
/* Metdata version.
- * This is either 'none' for arrays with externally managed metadata,
+ * This is one of
+ * 'none' for arrays with no metadata (good luck...)
+ * 'external' for arrays with externally managed metadata,
* or N.M for internally known formats
*/
static ssize_t
if (mddev->persistent)
return sprintf(page, "%d.%d\n",
mddev->major_version, mddev->minor_version);
+ else if (mddev->external)
+ return sprintf(page, "external:%s\n", mddev->metadata_type);
else
return sprintf(page, "none\n");
}
if (cmd_match(buf, "none")) {
mddev->persistent = 0;
+ mddev->external = 0;
+ mddev->major_version = 0;
+ mddev->minor_version = 90;
+ return len;
+ }
+ if (strncmp(buf, "external:", 9) == 0) {
+ int namelen = len-9;
+ if (namelen >= sizeof(mddev->metadata_type))
+ namelen = sizeof(mddev->metadata_type)-1;
+ strncpy(mddev->metadata_type, buf+9, namelen);
+ mddev->metadata_type[namelen] = 0;
+ if (namelen && mddev->metadata_type[namelen-1] == '\n')
+ mddev->metadata_type[--namelen] = 0;
+ mddev->persistent = 0;
+ mddev->external = 1;
mddev->major_version = 0;
mddev->minor_version = 90;
return len;
mddev->major_version = major;
mddev->minor_version = minor;
mddev->persistent = 1;
+ mddev->external = 0;
return len;
}
static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
+static ssize_t
+max_sync_show(mddev_t *mddev, char *page)
+{
+ if (mddev->resync_max == MaxSector)
+ return sprintf(page, "max\n");
+ else
+ return sprintf(page, "%llu\n",
+ (unsigned long long)mddev->resync_max);
+}
+static ssize_t
+max_sync_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ if (strncmp(buf, "max", 3) == 0)
+ mddev->resync_max = MaxSector;
+ else {
+ char *ep;
+ unsigned long long max = simple_strtoull(buf, &ep, 10);
+ if (ep == buf || (*ep != 0 && *ep != '\n'))
+ return -EINVAL;
+ if (max < mddev->resync_max &&
+ test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ return -EBUSY;
+
+ /* Must be a multiple of chunk_size */
+ if (mddev->chunk_size) {
+ if (max & (sector_t)((mddev->chunk_size>>9)-1))
+ return -EINVAL;
+ }
+ mddev->resync_max = max;
+ }
+ wake_up(&mddev->recovery_wait);
+ return len;
+}
+
+static struct md_sysfs_entry md_max_sync =
+__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
+
static ssize_t
suspend_lo_show(mddev_t *mddev, char *page)
{
&md_sync_max.attr,
&md_sync_speed.attr,
&md_sync_completed.attr,
+ &md_max_sync.attr,
&md_suspend_lo.attr,
&md_suspend_hi.attr,
&md_bitmap.attr,
/*
* Analyze all RAID superblock(s)
*/
- if (!mddev->raid_disks)
+ if (!mddev->raid_disks) {
+ if (!mddev->persistent)
+ return -EINVAL;
analyze_sbs(mddev);
+ }
chunk_size = mddev->chunk_size;
mddev->size = 0;
mddev->raid_disks = 0;
mddev->recovery_cp = 0;
+ mddev->resync_max = MaxSector;
mddev->reshape_position = MaxSector;
+ mddev->external = 0;
+ mddev->persistent = 0;
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
mddev_unlock(mddev);
} else {
printk(KERN_INFO "md: created %s\n", mdname(mddev));
+ mddev->persistent = 1;
ITERATE_RDEV_GENERIC(candidates,rdev,tmp) {
list_del_init(&rdev->same_set);
if (bind_rdev_to_array(rdev, mddev))
else
mddev->recovery_cp = 0;
mddev->persistent = ! info->not_persistent;
+ mddev->external = 0;
mddev->layout = info->layout;
mddev->chunk_size = info->chunk_size;
mddev->max_disks = MD_SB_DISKS;
- mddev->flags = 0;
+ if (mddev->persistent)
+ mddev->flags = 0;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
mddev->major_version,
mddev->minor_version);
}
- } else
+ } else if (mddev->external)
+ seq_printf(seq, " super external:%s",
+ mddev->metadata_type);
+ else
seq_printf(seq, " super non-persistent");
if (mddev->pers) {
sector_t sectors;
skipped = 0;
+ if (j >= mddev->resync_max) {
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ wait_event(mddev->recovery_wait,
+ mddev->resync_max > j
+ || kthread_should_stop());
+ }
+ if (kthread_should_stop())
+ goto interrupted;
sectors = mddev->pers->sync_request(mddev, j, &skipped,
- currspeed < speed_min(mddev));
+ currspeed < speed_min(mddev));
if (sectors == 0) {
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
goto out;
}
- if (kthread_should_stop()) {
- /*
- * got a signal, exit.
- */
- printk(KERN_INFO
- "md: md_do_sync() got signal ... exiting\n");
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- goto out;
- }
+ if (kthread_should_stop())
+ goto interrupted;
+
/*
* this loop exits only if either when we are slower than
skip:
mddev->curr_resync = 0;
+ mddev->resync_max = MaxSector;
+ sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wake_up(&resync_wait);
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread);
+ return;
+
+ interrupted:
+ /*
+ * got a signal, exit.
+ */
+ printk(KERN_INFO
+ "md: md_do_sync() got signal ... exiting\n");
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+ goto out;
+
}
EXPORT_SYMBOL_GPL(md_do_sync);
ITERATE_RDEV(mddev,rdev,rtmp)
if (rdev->raid_disk >= 0 &&
+ !mddev->external &&
(test_bit(Faulty, &rdev->flags) ||
! test_bit(In_sync, &rdev->flags)) &&
atomic_read(&rdev->nr_pending)==0) {
}
if ( ! (
- mddev->flags ||
+ (mddev->flags && !mddev->external) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
(mddev->safemode == 1) ||
if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
!mddev->in_sync && mddev->recovery_cp == MaxSector) {
mddev->in_sync = 1;
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ if (mddev->persistent)
+ set_bit(MD_CHANGE_CLEAN, &mddev->flags);
}
if (mddev->safemode == 1)
mddev->safemode = 0;