in raid10.c [4510:4695]
static int raid10_start_reshape(struct mddev *mddev)
{
/* A 'reshape' has been requested. This commits
* the various 'new' fields and sets MD_RECOVER_RESHAPE
* This also checks if there are enough spares and adds them
* to the array.
* We currently require enough spares to make the final
* array non-degraded. We also require that the difference
* between old and new data_offset - on each device - is
* enough that we never risk over-writing.
*/
unsigned long before_length, after_length;
sector_t min_offset_diff = 0;
int first = 1;
struct geom new;
struct r10conf *conf = mddev->private;
struct md_rdev *rdev;
int spares = 0;
int ret;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
if (setup_geo(&new, mddev, geo_start) != conf->copies)
return -EINVAL;
before_length = ((1 << conf->prev.chunk_shift) *
conf->prev.far_copies);
after_length = ((1 << conf->geo.chunk_shift) *
conf->geo.far_copies);
rdev_for_each(rdev, mddev) {
if (!test_bit(In_sync, &rdev->flags)
&& !test_bit(Faulty, &rdev->flags))
spares++;
if (rdev->raid_disk >= 0) {
long long diff = (rdev->new_data_offset
- rdev->data_offset);
if (!mddev->reshape_backwards)
diff = -diff;
if (diff < 0)
diff = 0;
if (first || diff < min_offset_diff)
min_offset_diff = diff;
first = 0;
}
}
if (max(before_length, after_length) > min_offset_diff)
return -EINVAL;
if (spares < mddev->delta_disks)
return -EINVAL;
conf->offset_diff = min_offset_diff;
spin_lock_irq(&conf->device_lock);
if (conf->mirrors_new) {
memcpy(conf->mirrors_new, conf->mirrors,
sizeof(struct raid10_info)*conf->prev.raid_disks);
smp_mb();
kfree(conf->mirrors_old);
conf->mirrors_old = conf->mirrors;
conf->mirrors = conf->mirrors_new;
conf->mirrors_new = NULL;
}
setup_geo(&conf->geo, mddev, geo_start);
smp_mb();
if (mddev->reshape_backwards) {
sector_t size = raid10_size(mddev, 0, 0);
if (size < mddev->array_sectors) {
spin_unlock_irq(&conf->device_lock);
pr_warn("md/raid10:%s: array size must be reduce before number of disks\n",
mdname(mddev));
return -EINVAL;
}
mddev->resync_max_sectors = size;
conf->reshape_progress = size;
} else
conf->reshape_progress = 0;
conf->reshape_safe = conf->reshape_progress;
spin_unlock_irq(&conf->device_lock);
if (mddev->delta_disks && mddev->bitmap) {
struct mdp_superblock_1 *sb = NULL;
sector_t oldsize, newsize;
oldsize = raid10_size(mddev, 0, 0);
newsize = raid10_size(mddev, 0, conf->geo.raid_disks);
if (!mddev_is_clustered(mddev)) {
ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
if (ret)
goto abort;
else
goto out;
}
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk > -1 &&
!test_bit(Faulty, &rdev->flags))
sb = page_address(rdev->sb_page);
}
/*
* some node is already performing reshape, and no need to
* call md_bitmap_resize again since it should be called when
* receiving BITMAP_RESIZE msg
*/
if ((sb && (le32_to_cpu(sb->feature_map) &
MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize))
goto out;
ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
if (ret)
goto abort;
ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
if (ret) {
md_bitmap_resize(mddev->bitmap, oldsize, 0, 0);
goto abort;
}
}
out:
if (mddev->delta_disks > 0) {
rdev_for_each(rdev, mddev)
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags)) {
if (raid10_add_disk(mddev, rdev) == 0) {
if (rdev->raid_disk >=
conf->prev.raid_disks)
set_bit(In_sync, &rdev->flags);
else
rdev->recovery_offset = 0;
/* Failure here is OK */
sysfs_link_rdev(mddev, rdev);
}
} else if (rdev->raid_disk >= conf->prev.raid_disks
&& !test_bit(Faulty, &rdev->flags)) {
/* This is a spare that was manually added */
set_bit(In_sync, &rdev->flags);
}
}
/* When a reshape changes the number of devices,
* ->degraded is measured against the larger of the
* pre and post numbers.
*/
spin_lock_irq(&conf->device_lock);
mddev->degraded = calc_degraded(conf);
spin_unlock_irq(&conf->device_lock);
mddev->raid_disks = conf->geo.raid_disks;
mddev->reshape_position = conf->reshape_progress;
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
"reshape");
if (!mddev->sync_thread) {
ret = -EAGAIN;
goto abort;
}
conf->reshape_checkpoint = jiffies;
md_wakeup_thread(mddev->sync_thread);
md_new_event();
return 0;
abort:
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
conf->geo = conf->prev;
mddev->raid_disks = conf->geo.raid_disks;
rdev_for_each(rdev, mddev)
rdev->new_data_offset = rdev->data_offset;
smp_wmb();
conf->reshape_progress = MaxSector;
conf->reshape_safe = MaxSector;
mddev->reshape_position = MaxSector;
spin_unlock_irq(&conf->device_lock);
return ret;
}