int md_run()

in md.c [5793:6066]


int md_run(struct mddev *mddev)
{
	int err;
	struct md_rdev *rdev;
	struct md_personality *pers;
	bool nowait = true;

	if (list_empty(&mddev->disks))
		/* cannot run an array with no devices.. */
		return -EINVAL;

	if (mddev->pers)
		return -EBUSY;
	/* Cannot run until previous stop completes properly */
	if (mddev->sysfs_active)
		return -EBUSY;

	/*
	 * Analyze all RAID superblock(s)
	 */
	if (!mddev->raid_disks) {
		if (!mddev->persistent)
			return -EINVAL;
		err = analyze_sbs(mddev);
		if (err)
			return -EINVAL;
	}

	if (mddev->level != LEVEL_NONE)
		request_module("md-level-%d", mddev->level);
	else if (mddev->clevel[0])
		request_module("md-%s", mddev->clevel);

	/*
	 * Drop all container device buffers, from now on
	 * the only valid external interface is through the md
	 * device.
	 */
	mddev->has_superblocks = false;
	rdev_for_each(rdev, mddev) {
		if (test_bit(Faulty, &rdev->flags))
			continue;
		sync_blockdev(rdev->bdev);
		invalidate_bdev(rdev->bdev);
		if (mddev->ro != 1 && rdev_read_only(rdev)) {
			mddev->ro = 1;
			if (mddev->gendisk)
				set_disk_ro(mddev->gendisk, 1);
		}

		if (rdev->sb_page)
			mddev->has_superblocks = true;

		/* perform some consistency tests on the device.
		 * We don't want the data to overlap the metadata,
		 * Internal Bitmap issues have been handled elsewhere.
		 */
		if (rdev->meta_bdev) {
			/* Nothing to check */;
		} else if (rdev->data_offset < rdev->sb_start) {
			if (mddev->dev_sectors &&
			    rdev->data_offset + mddev->dev_sectors
			    > rdev->sb_start) {
				pr_warn("md: %s: data overlaps metadata\n",
					mdname(mddev));
				return -EINVAL;
			}
		} else {
			if (rdev->sb_start + rdev->sb_size/512
			    > rdev->data_offset) {
				pr_warn("md: %s: metadata overlaps data\n",
					mdname(mddev));
				return -EINVAL;
			}
		}
		sysfs_notify_dirent_safe(rdev->sysfs_state);
		nowait = nowait && blk_queue_nowait(bdev_get_queue(rdev->bdev));
	}

	/* Set the NOWAIT flags if all underlying devices support it */
	if (nowait)
		blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue);

	if (!bioset_initialized(&mddev->bio_set)) {
		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
		if (err)
			return err;
	}
	if (!bioset_initialized(&mddev->sync_set)) {
		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
		if (err)
			goto exit_bio_set;
	}

	spin_lock(&pers_lock);
	pers = find_pers(mddev->level, mddev->clevel);
	if (!pers || !try_module_get(pers->owner)) {
		spin_unlock(&pers_lock);
		if (mddev->level != LEVEL_NONE)
			pr_warn("md: personality for level %d is not loaded!\n",
				mddev->level);
		else
			pr_warn("md: personality for level %s is not loaded!\n",
				mddev->clevel);
		err = -EINVAL;
		goto abort;
	}
	spin_unlock(&pers_lock);
	if (mddev->level != pers->level) {
		mddev->level = pers->level;
		mddev->new_level = pers->level;
	}
	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));

	if (mddev->reshape_position != MaxSector &&
	    pers->start_reshape == NULL) {
		/* This personality cannot handle reshaping... */
		module_put(pers->owner);
		err = -EINVAL;
		goto abort;
	}

	if (pers->sync_request) {
		/* Warn if this is a potentially silly
		 * configuration.
		 */
		char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
		struct md_rdev *rdev2;
		int warned = 0;

		rdev_for_each(rdev, mddev)
			rdev_for_each(rdev2, mddev) {
				if (rdev < rdev2 &&
				    rdev->bdev->bd_disk ==
				    rdev2->bdev->bd_disk) {
					pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
						mdname(mddev),
						bdevname(rdev->bdev,b),
						bdevname(rdev2->bdev,b2));
					warned = 1;
				}
			}

		if (warned)
			pr_warn("True protection against single-disk failure might be compromised.\n");
	}

	mddev->recovery = 0;
	/* may be over-ridden by personality */
	mddev->resync_max_sectors = mddev->dev_sectors;

	mddev->ok_start_degraded = start_dirty_degraded;

	if (start_readonly && mddev->ro == 0)
		mddev->ro = 2; /* read-only, but switch on first write */

	err = pers->run(mddev);
	if (err)
		pr_warn("md: pers->run() failed ...\n");
	else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
		WARN_ONCE(!mddev->external_size,
			  "%s: default size too small, but 'external_size' not in effect?\n",
			  __func__);
		pr_warn("md: invalid array_size %llu > default size %llu\n",
			(unsigned long long)mddev->array_sectors / 2,
			(unsigned long long)pers->size(mddev, 0, 0) / 2);
		err = -EINVAL;
	}
	if (err == 0 && pers->sync_request &&
	    (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
		struct bitmap *bitmap;

		bitmap = md_bitmap_create(mddev, -1);
		if (IS_ERR(bitmap)) {
			err = PTR_ERR(bitmap);
			pr_warn("%s: failed to create bitmap (%d)\n",
				mdname(mddev), err);
		} else
			mddev->bitmap = bitmap;

	}
	if (err)
		goto bitmap_abort;

	if (mddev->bitmap_info.max_write_behind > 0) {
		bool create_pool = false;

		rdev_for_each(rdev, mddev) {
			if (test_bit(WriteMostly, &rdev->flags) &&
			    rdev_init_serial(rdev))
				create_pool = true;
		}
		if (create_pool && mddev->serial_info_pool == NULL) {
			mddev->serial_info_pool =
				mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
						    sizeof(struct serial_info));
			if (!mddev->serial_info_pool) {
				err = -ENOMEM;
				goto bitmap_abort;
			}
		}
	}

	if (mddev->queue) {
		bool nonrot = true;

		rdev_for_each(rdev, mddev) {
			if (rdev->raid_disk >= 0 &&
			    !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) {
				nonrot = false;
				break;
			}
		}
		if (mddev->degraded)
			nonrot = false;
		if (nonrot)
			blk_queue_flag_set(QUEUE_FLAG_NONROT, mddev->queue);
		else
			blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue);
		blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue);
	}
	if (pers->sync_request) {
		if (mddev->kobj.sd &&
		    sysfs_create_group(&mddev->kobj, &md_redundancy_group))
			pr_warn("md: cannot register extra attributes for %s\n",
				mdname(mddev));
		mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
		mddev->sysfs_completed = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_completed");
		mddev->sysfs_degraded = sysfs_get_dirent_safe(mddev->kobj.sd, "degraded");
	} else if (mddev->ro == 2) /* auto-readonly not meaningful */
		mddev->ro = 0;

	atomic_set(&mddev->max_corr_read_errors,
		   MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
	mddev->safemode = 0;
	if (mddev_is_clustered(mddev))
		mddev->safemode_delay = 0;
	else
		mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
	mddev->in_sync = 1;
	smp_wmb();
	spin_lock(&mddev->lock);
	mddev->pers = pers;
	spin_unlock(&mddev->lock);
	rdev_for_each(rdev, mddev)
		if (rdev->raid_disk >= 0)
			sysfs_link_rdev(mddev, rdev); /* failure here is OK */

	if (mddev->degraded && !mddev->ro)
		/* This ensures that recovering status is reported immediately
		 * via sysfs - until a lack of spares is confirmed.
		 */
		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);

	if (mddev->sb_flags)
		md_update_sb(mddev, 0);

	md_new_event();
	return 0;

bitmap_abort:
	mddev_detach(mddev);
	if (mddev->private)
		pers->free(mddev, mddev->private);
	mddev->private = NULL;
	module_put(pers->owner);
	md_bitmap_destroy(mddev);
abort:
	bioset_exit(&mddev->sync_set);
exit_bio_set:
	bioset_exit(&mddev->bio_set);
	return err;
}