in raid5.c [4584:4777]
static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
{
struct r5conf *conf = sh->raid_conf;
int disks = sh->disks;
struct r5dev *dev;
int i;
int do_recovery = 0;
memset(s, 0, sizeof(*s));
s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state) && !sh->batch_head;
s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head;
s->failed_num[0] = -1;
s->failed_num[1] = -1;
s->log_failed = r5l_log_disk_error(conf);
/* Now to look around and see what can be done */
rcu_read_lock();
for (i=disks; i--; ) {
struct md_rdev *rdev;
sector_t first_bad;
int bad_sectors;
int is_bad = 0;
dev = &sh->dev[i];
pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
i, dev->flags,
dev->toread, dev->towrite, dev->written);
/* maybe we can reply to a read
*
* new wantfill requests are only permitted while
* ops_complete_biofill is guaranteed to be inactive
*/
if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
!test_bit(STRIPE_BIOFILL_RUN, &sh->state))
set_bit(R5_Wantfill, &dev->flags);
/* now count some things */
if (test_bit(R5_LOCKED, &dev->flags))
s->locked++;
if (test_bit(R5_UPTODATE, &dev->flags))
s->uptodate++;
if (test_bit(R5_Wantcompute, &dev->flags)) {
s->compute++;
BUG_ON(s->compute > 2);
}
if (test_bit(R5_Wantfill, &dev->flags))
s->to_fill++;
else if (dev->toread)
s->to_read++;
if (dev->towrite) {
s->to_write++;
if (!test_bit(R5_OVERWRITE, &dev->flags))
s->non_overwrite++;
}
if (dev->written)
s->written++;
/* Prefer to use the replacement for reads, but only
* if it is recovered enough and has no bad blocks.
*/
rdev = rcu_dereference(conf->disks[i].replacement);
if (rdev && !test_bit(Faulty, &rdev->flags) &&
rdev->recovery_offset >= sh->sector + RAID5_STRIPE_SECTORS(conf) &&
!is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
&first_bad, &bad_sectors))
set_bit(R5_ReadRepl, &dev->flags);
else {
if (rdev && !test_bit(Faulty, &rdev->flags))
set_bit(R5_NeedReplace, &dev->flags);
else
clear_bit(R5_NeedReplace, &dev->flags);
rdev = rcu_dereference(conf->disks[i].rdev);
clear_bit(R5_ReadRepl, &dev->flags);
}
if (rdev && test_bit(Faulty, &rdev->flags))
rdev = NULL;
if (rdev) {
is_bad = is_badblock(rdev, sh->sector, RAID5_STRIPE_SECTORS(conf),
&first_bad, &bad_sectors);
if (s->blocked_rdev == NULL
&& (test_bit(Blocked, &rdev->flags)
|| is_bad < 0)) {
if (is_bad < 0)
set_bit(BlockedBadBlocks,
&rdev->flags);
s->blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
}
}
clear_bit(R5_Insync, &dev->flags);
if (!rdev)
/* Not in-sync */;
else if (is_bad) {
/* also not in-sync */
if (!test_bit(WriteErrorSeen, &rdev->flags) &&
test_bit(R5_UPTODATE, &dev->flags)) {
/* treat as in-sync, but with a read error
* which we can now try to correct
*/
set_bit(R5_Insync, &dev->flags);
set_bit(R5_ReadError, &dev->flags);
}
} else if (test_bit(In_sync, &rdev->flags))
set_bit(R5_Insync, &dev->flags);
else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset)
/* in sync if before recovery_offset */
set_bit(R5_Insync, &dev->flags);
else if (test_bit(R5_UPTODATE, &dev->flags) &&
test_bit(R5_Expanded, &dev->flags))
/* If we've reshaped into here, we assume it is Insync.
* We will shortly update recovery_offset to make
* it official.
*/
set_bit(R5_Insync, &dev->flags);
if (test_bit(R5_WriteError, &dev->flags)) {
/* This flag does not apply to '.replacement'
* only to .rdev, so make sure to check that*/
struct md_rdev *rdev2 = rcu_dereference(
conf->disks[i].rdev);
if (rdev2 == rdev)
clear_bit(R5_Insync, &dev->flags);
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
s->handle_bad_blocks = 1;
atomic_inc(&rdev2->nr_pending);
} else
clear_bit(R5_WriteError, &dev->flags);
}
if (test_bit(R5_MadeGood, &dev->flags)) {
/* This flag does not apply to '.replacement'
* only to .rdev, so make sure to check that*/
struct md_rdev *rdev2 = rcu_dereference(
conf->disks[i].rdev);
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
s->handle_bad_blocks = 1;
atomic_inc(&rdev2->nr_pending);
} else
clear_bit(R5_MadeGood, &dev->flags);
}
if (test_bit(R5_MadeGoodRepl, &dev->flags)) {
struct md_rdev *rdev2 = rcu_dereference(
conf->disks[i].replacement);
if (rdev2 && !test_bit(Faulty, &rdev2->flags)) {
s->handle_bad_blocks = 1;
atomic_inc(&rdev2->nr_pending);
} else
clear_bit(R5_MadeGoodRepl, &dev->flags);
}
if (!test_bit(R5_Insync, &dev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
clear_bit(R5_ReWrite, &dev->flags);
}
if (test_bit(R5_ReadError, &dev->flags))
clear_bit(R5_Insync, &dev->flags);
if (!test_bit(R5_Insync, &dev->flags)) {
if (s->failed < 2)
s->failed_num[s->failed] = i;
s->failed++;
if (rdev && !test_bit(Faulty, &rdev->flags))
do_recovery = 1;
else if (!rdev) {
rdev = rcu_dereference(
conf->disks[i].replacement);
if (rdev && !test_bit(Faulty, &rdev->flags))
do_recovery = 1;
}
}
if (test_bit(R5_InJournal, &dev->flags))
s->injournal++;
if (test_bit(R5_InJournal, &dev->flags) && dev->written)
s->just_cached++;
}
if (test_bit(STRIPE_SYNCING, &sh->state)) {
/* If there is a failed device being replaced,
* we must be recovering.
* else if we are after recovery_cp, we must be syncing
* else if MD_RECOVERY_REQUESTED is set, we also are syncing.
* else we can only be replacing
* sync and recovery both need to read all devices, and so
* use the same flag.
*/
if (do_recovery ||
sh->sector >= conf->mddev->recovery_cp ||
test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery)))
s->syncing = 1;
else
s->replacing = 1;
}
rcu_read_unlock();
}