in raid10.c [1366:1527]
static void raid10_write_request(struct mddev *mddev, struct bio *bio,
struct r10bio *r10_bio)
{
struct r10conf *conf = mddev->private;
int i;
sector_t sectors;
int max_sectors;
if ((mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector,
bio_end_sector(bio)))) {
DEFINE_WAIT(w);
/* Bail out if REQ_NOWAIT is set for the bio */
if (bio->bi_opf & REQ_NOWAIT) {
bio_wouldblock_error(bio);
return;
}
for (;;) {
prepare_to_wait(&conf->wait_barrier,
&w, TASK_IDLE);
if (!md_cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector, bio_end_sector(bio)))
break;
schedule();
}
finish_wait(&conf->wait_barrier, &w);
}
sectors = r10_bio->sectors;
if (!regular_request_wait(mddev, conf, bio, sectors))
return;
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
(mddev->reshape_backwards
? (bio->bi_iter.bi_sector < conf->reshape_safe &&
bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
: (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
bio->bi_iter.bi_sector < conf->reshape_progress))) {
/* Need to update reshape_position in metadata */
mddev->reshape_position = conf->reshape_progress;
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
md_wakeup_thread(mddev->thread);
if (bio->bi_opf & REQ_NOWAIT) {
allow_barrier(conf);
bio_wouldblock_error(bio);
return;
}
raid10_log(conf->mddev, "wait reshape metadata");
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
conf->reshape_safe = mddev->reshape_position;
}
/* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting
* bios[x] to bio
* If there are known/acknowledged bad blocks on any device
* on which we have seen a write error, we want to avoid
* writing to those blocks. This potentially requires several
* writes to write around the bad blocks. Each set of writes
* gets its own r10_bio with a set of bios attached.
*/
r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
raid10_find_phys(conf, r10_bio);
wait_blocked_dev(mddev, r10_bio);
rcu_read_lock();
max_sectors = r10_bio->sectors;
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
struct md_rdev *rrdev = rcu_dereference(
conf->mirrors[d].replacement);
if (rdev == rrdev)
rrdev = NULL;
if (rdev && (test_bit(Faulty, &rdev->flags)))
rdev = NULL;
if (rrdev && (test_bit(Faulty, &rrdev->flags)))
rrdev = NULL;
r10_bio->devs[i].bio = NULL;
r10_bio->devs[i].repl_bio = NULL;
if (!rdev && !rrdev) {
set_bit(R10BIO_Degraded, &r10_bio->state);
continue;
}
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
int bad_sectors;
int is_bad;
is_bad = is_badblock(rdev, dev_sector, max_sectors,
&first_bad, &bad_sectors);
if (is_bad && first_bad <= dev_sector) {
/* Cannot write here at all */
bad_sectors -= (dev_sector - first_bad);
if (bad_sectors < max_sectors)
/* Mustn't write more than bad_sectors
* to other devices yet
*/
max_sectors = bad_sectors;
/* We don't set R10BIO_Degraded as that
* only applies if the disk is missing,
* so it might be re-added, and we want to
* know to recover this chunk.
* In this case the device is here, and the
* fact that this chunk is not in-sync is
* recorded in the bad block log.
*/
continue;
}
if (is_bad) {
int good_sectors = first_bad - dev_sector;
if (good_sectors < max_sectors)
max_sectors = good_sectors;
}
}
if (rdev) {
r10_bio->devs[i].bio = bio;
atomic_inc(&rdev->nr_pending);
}
if (rrdev) {
r10_bio->devs[i].repl_bio = bio;
atomic_inc(&rrdev->nr_pending);
}
}
rcu_read_unlock();
if (max_sectors < r10_bio->sectors)
r10_bio->sectors = max_sectors;
if (r10_bio->sectors < bio_sectors(bio)) {
struct bio *split = bio_split(bio, r10_bio->sectors,
GFP_NOIO, &conf->bio_split);
bio_chain(split, bio);
allow_barrier(conf);
submit_bio_noacct(bio);
wait_barrier(conf, false);
bio = split;
r10_bio->master_bio = bio;
}
if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue))
r10_bio->start_time = bio_start_io_acct(bio);
atomic_set(&r10_bio->remaining, 1);
md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
for (i = 0; i < conf->copies; i++) {
if (r10_bio->devs[i].bio)
raid10_write_one_disk(mddev, r10_bio, bio, false, i);
if (r10_bio->devs[i].repl_bio)
raid10_write_one_disk(mddev, r10_bio, bio, true, i);
}
one_write_done(r10_bio);
}