in md.c [2669:2860]
void md_update_sb(struct mddev *mddev, int force_change)
{
struct md_rdev *rdev;
int sync_req;
int nospares = 0;
int any_badblocks_changed = 0;
int ret = -1;
if (mddev->ro) {
if (force_change)
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
return;
}
repeat:
if (mddev_is_clustered(mddev)) {
if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
force_change = 1;
if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
nospares = 1;
ret = md_cluster_ops->metadata_update_start(mddev);
/* Has someone else has updated the sb */
if (!does_sb_need_changing(mddev)) {
if (ret == 0)
md_cluster_ops->metadata_update_cancel(mddev);
bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
BIT(MD_SB_CHANGE_DEVS) |
BIT(MD_SB_CHANGE_CLEAN));
return;
}
}
/*
* First make sure individual recovery_offsets are correct
* curr_resync_completed can only be used during recovery.
* During reshape/resync it might use array-addresses rather
* that device addresses.
*/
rdev_for_each(rdev, mddev) {
if (rdev->raid_disk >= 0 &&
mddev->delta_disks >= 0 &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
!test_bit(Journal, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
mddev->curr_resync_completed > rdev->recovery_offset)
rdev->recovery_offset = mddev->curr_resync_completed;
}
if (!mddev->persistent) {
clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
if (!mddev->external) {
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
rdev_for_each(rdev, mddev) {
if (rdev->badblocks.changed) {
rdev->badblocks.changed = 0;
ack_all_badblocks(&rdev->badblocks);
md_error(mddev, rdev);
}
clear_bit(Blocked, &rdev->flags);
clear_bit(BlockedBadBlocks, &rdev->flags);
wake_up(&rdev->blocked_wait);
}
}
wake_up(&mddev->sb_wait);
return;
}
spin_lock(&mddev->lock);
mddev->utime = ktime_get_real_seconds();
if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
force_change = 1;
if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
/* just a clean<-> dirty transition, possibly leave spares alone,
* though if events isn't the right even/odd, we will have to do
* spares after all
*/
nospares = 1;
if (force_change)
nospares = 0;
if (mddev->degraded)
/* If the array is degraded, then skipping spares is both
* dangerous and fairly pointless.
* Dangerous because a device that was removed from the array
* might have a event_count that still looks up-to-date,
* so it can be re-added without a resync.
* Pointless because if there are any spares to skip,
* then a recovery will happen and soon that array won't
* be degraded any more and the spare can go back to sleep then.
*/
nospares = 0;
sync_req = mddev->in_sync;
/* If this is just a dirty<->clean transition, and the array is clean
* and 'events' is odd, we can roll back to the previous clean state */
if (nospares
&& (mddev->in_sync && mddev->recovery_cp == MaxSector)
&& mddev->can_decrease_events
&& mddev->events != 1) {
mddev->events--;
mddev->can_decrease_events = 0;
} else {
/* otherwise we have to go forward and ... */
mddev->events ++;
mddev->can_decrease_events = nospares;
}
/*
* This 64-bit counter should never wrap.
* Either we are in around ~1 trillion A.C., assuming
* 1 reboot per second, or we have a bug...
*/
WARN_ON(mddev->events == 0);
rdev_for_each(rdev, mddev) {
if (rdev->badblocks.changed)
any_badblocks_changed++;
if (test_bit(Faulty, &rdev->flags))
set_bit(FaultRecorded, &rdev->flags);
}
sync_sbs(mddev, nospares);
spin_unlock(&mddev->lock);
pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
mdname(mddev), mddev->in_sync);
if (mddev->queue)
blk_add_trace_msg(mddev->queue, "md md_update_sb");
rewrite:
md_bitmap_update_sb(mddev->bitmap);
rdev_for_each(rdev, mddev) {
char b[BDEVNAME_SIZE];
if (rdev->sb_loaded != 1)
continue; /* no noise on spare devices */
if (!test_bit(Faulty, &rdev->flags)) {
md_super_write(mddev,rdev,
rdev->sb_start, rdev->sb_size,
rdev->sb_page);
pr_debug("md: (write) %s's sb offset: %llu\n",
bdevname(rdev->bdev, b),
(unsigned long long)rdev->sb_start);
rdev->sb_events = mddev->events;
if (rdev->badblocks.size) {
md_super_write(mddev, rdev,
rdev->badblocks.sector,
rdev->badblocks.size << 9,
rdev->bb_page);
rdev->badblocks.size = 0;
}
} else
pr_debug("md: %s (skipping faulty)\n",
bdevname(rdev->bdev, b));
if (mddev->level == LEVEL_MULTIPATH)
/* only need to write one superblock... */
break;
}
if (md_super_wait(mddev) < 0)
goto rewrite;
/* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */
if (mddev_is_clustered(mddev) && ret == 0)
md_cluster_ops->metadata_update_finish(mddev);
if (mddev->in_sync != sync_req ||
!bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
/* have to write it out again */
goto repeat;
wake_up(&mddev->sb_wait);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
sysfs_notify_dirent_safe(mddev->sysfs_completed);
rdev_for_each(rdev, mddev) {
if (test_and_clear_bit(FaultRecorded, &rdev->flags))
clear_bit(Blocked, &rdev->flags);
if (any_badblocks_changed)
ack_all_badblocks(&rdev->badblocks);
clear_bit(BlockedBadBlocks, &rdev->flags);
wake_up(&rdev->blocked_wait);
}
}