in md.c [5793:6066]
int md_run(struct mddev *mddev)
{
int err;
struct md_rdev *rdev;
struct md_personality *pers;
bool nowait = true;
if (list_empty(&mddev->disks))
/* cannot run an array with no devices.. */
return -EINVAL;
if (mddev->pers)
return -EBUSY;
/* Cannot run until previous stop completes properly */
if (mddev->sysfs_active)
return -EBUSY;
/*
* Analyze all RAID superblock(s)
*/
if (!mddev->raid_disks) {
if (!mddev->persistent)
return -EINVAL;
err = analyze_sbs(mddev);
if (err)
return -EINVAL;
}
if (mddev->level != LEVEL_NONE)
request_module("md-level-%d", mddev->level);
else if (mddev->clevel[0])
request_module("md-%s", mddev->clevel);
/*
* Drop all container device buffers, from now on
* the only valid external interface is through the md
* device.
*/
mddev->has_superblocks = false;
rdev_for_each(rdev, mddev) {
if (test_bit(Faulty, &rdev->flags))
continue;
sync_blockdev(rdev->bdev);
invalidate_bdev(rdev->bdev);
if (mddev->ro != 1 && rdev_read_only(rdev)) {
mddev->ro = 1;
if (mddev->gendisk)
set_disk_ro(mddev->gendisk, 1);
}
if (rdev->sb_page)
mddev->has_superblocks = true;
/* perform some consistency tests on the device.
* We don't want the data to overlap the metadata,
* Internal Bitmap issues have been handled elsewhere.
*/
if (rdev->meta_bdev) {
/* Nothing to check */;
} else if (rdev->data_offset < rdev->sb_start) {
if (mddev->dev_sectors &&
rdev->data_offset + mddev->dev_sectors
> rdev->sb_start) {
pr_warn("md: %s: data overlaps metadata\n",
mdname(mddev));
return -EINVAL;
}
} else {
if (rdev->sb_start + rdev->sb_size/512
> rdev->data_offset) {
pr_warn("md: %s: metadata overlaps data\n",
mdname(mddev));
return -EINVAL;
}
}
sysfs_notify_dirent_safe(rdev->sysfs_state);
nowait = nowait && blk_queue_nowait(bdev_get_queue(rdev->bdev));
}
/* Set the NOWAIT flags if all underlying devices support it */
if (nowait)
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);
if (!bioset_initialized(&mddev->bio_set)) {
err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (err)
return err;
}
if (!bioset_initialized(&mddev->sync_set)) {
err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (err)
goto exit_bio_set;
}
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
if (!pers || !try_module_get(pers->owner)) {
spin_unlock(&pers_lock);
if (mddev->level != LEVEL_NONE)
pr_warn("md: personality for level %d is not loaded!\n",
mddev->level);
else
pr_warn("md: personality for level %s is not loaded!\n",
mddev->clevel);
err = -EINVAL;
goto abort;
}
spin_unlock(&pers_lock);
if (mddev->level != pers->level) {
mddev->level = pers->level;
mddev->new_level = pers->level;
}
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
module_put(pers->owner);
err = -EINVAL;
goto abort;
}
if (pers->sync_request) {
/* Warn if this is a potentially silly
* configuration.
*/
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
struct md_rdev *rdev2;
int warned = 0;
rdev_for_each(rdev, mddev)
rdev_for_each(rdev2, mddev) {
if (rdev < rdev2 &&
rdev->bdev->bd_disk ==
rdev2->bdev->bd_disk) {
pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
mdname(mddev),
bdevname(rdev->bdev,b),
bdevname(rdev2->bdev,b2));
warned = 1;
}
}
if (warned)
pr_warn("True protection against single-disk failure might be compromised.\n");
}
mddev->recovery = 0;
/* may be over-ridden by personality */
mddev->resync_max_sectors = mddev->dev_sectors;
mddev->ok_start_degraded = start_dirty_degraded;
if (start_readonly && mddev->ro == 0)
mddev->ro = 2; /* read-only, but switch on first write */
err = pers->run(mddev);
if (err)
pr_warn("md: pers->run() failed ...\n");
else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
WARN_ONCE(!mddev->external_size,
"%s: default size too small, but 'external_size' not in effect?\n",
__func__);
pr_warn("md: invalid array_size %llu > default size %llu\n",
(unsigned long long)mddev->array_sectors / 2,
(unsigned long long)pers->size(mddev, 0, 0) / 2);
err = -EINVAL;
}
if (err == 0 && pers->sync_request &&
(mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
struct bitmap *bitmap;
bitmap = md_bitmap_create(mddev, -1);
if (IS_ERR(bitmap)) {
err = PTR_ERR(bitmap);
pr_warn("%s: failed to create bitmap (%d)\n",
mdname(mddev), err);
} else
mddev->bitmap = bitmap;
}
if (err)
goto bitmap_abort;
if (mddev->bitmap_info.max_write_behind > 0) {
bool create_pool = false;
rdev_for_each(rdev, mddev) {
if (test_bit(WriteMostly, &rdev->flags) &&
rdev_init_serial(rdev))
create_pool = true;
}
if (create_pool && mddev->serial_info_pool == NULL) {
mddev->serial_info_pool =
mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
sizeof(struct serial_info));
if (!mddev->serial_info_pool) {
err = -ENOMEM;
goto bitmap_abort;
}
}
}
if (mddev->queue) {
bool nonrot = true;
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk >= 0 &&
!blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
nonrot = false;
break;
}
}
if (mddev->degraded)
nonrot = false;
if (nonrot)
blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
else
blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
}
if (pers->sync_request) {
if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_redundancy_group))
pr_warn("md: cannot register extra attributes for %s\n",
mdname(mddev));
mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
} else if (mddev->ro == 2) /* auto-readonly not meaningful */
mddev->ro = 0;
atomic_set(&mddev->max_corr_read_errors,
MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
mddev->safemode = 0;
if (mddev_is_clustered(mddev))
mddev->safemode_delay = 0;
else
mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
mddev->in_sync = 1;
smp_wmb();
spin_lock(&mddev->lock);
mddev->pers = pers;
spin_unlock(&mddev->lock);
rdev_for_each(rdev, mddev)
if (rdev->raid_disk >= 0)
sysfs_link_rdev(mddev, rdev); /* failure here is OK */
if (mddev->degraded && !mddev->ro)
/* This ensures that recovering status is reported immediately
* via sysfs - until a lack of spares is confirmed.
*/
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
if (mddev->sb_flags)
md_update_sb(mddev, 0);
md_new_event();
return 0;
bitmap_abort:
mddev_detach(mddev);
if (mddev->private)
pers->free(mddev, mddev->private);
mddev->private = NULL;
module_put(pers->owner);
md_bitmap_destroy(mddev);
abort:
bioset_exit(&mddev->sync_set);
exit_bio_set:
bioset_exit(&mddev->bio_set);
return err;
}