in raid5.c [5773:5956]
static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
{
struct r5conf *conf = mddev->private;
int dd_idx;
sector_t new_sector;
sector_t logical_sector, last_sector;
struct stripe_head *sh;
const int rw = bio_data_dir(bi);
DEFINE_WAIT(w);
bool do_prepare;
bool do_flush = false;
if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
int ret = log_handle_flush_request(conf, bi);
if (ret == 0)
return true;
if (ret == -ENODEV) {
if (md_flush_request(mddev, bi))
return true;
}
/* ret == -EAGAIN, fallback */
/*
* if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
* we need to flush journal device
*/
do_flush = bi->bi_opf & REQ_PREFLUSH;
}
if (!md_write_start(mddev, bi))
return false;
/*
* If array is degraded, better not do chunk aligned read because
* later we might have to read it again in order to reconstruct
* data on failed drives.
*/
if (rw == READ && mddev->degraded == 0 &&
mddev->reshape_position == MaxSector) {
bi = chunk_aligned_read(mddev, bi);
if (!bi)
return true;
}
if (unlikely(bio_op(bi) == REQ_OP_DISCARD)) {
make_discard_request(mddev, bi);
md_write_end(mddev);
return true;
}
logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
last_sector = bio_end_sector(bi);
bi->bi_next = NULL;
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
if ((bi->bi_opf & REQ_NOWAIT) &&
(conf->reshape_progress != MaxSector) &&
(mddev->reshape_backwards
? (logical_sector > conf->reshape_progress && logical_sector <= conf->reshape_safe)
: (logical_sector >= conf->reshape_safe && logical_sector < conf->reshape_progress))) {
bio_wouldblock_error(bi);
if (rw == WRITE)
md_write_end(mddev);
return true;
}
md_account_bio(mddev, &bi);
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
for (; logical_sector < last_sector; logical_sector += RAID5_STRIPE_SECTORS(conf)) {
int previous;
int seq;
do_prepare = false;
retry:
seq = read_seqcount_begin(&conf->gen_lock);
previous = 0;
if (do_prepare)
prepare_to_wait(&conf->wait_for_overlap, &w,
TASK_UNINTERRUPTIBLE);
if (unlikely(conf->reshape_progress != MaxSector)) {
/* spinlock is needed as reshape_progress may be
* 64bit on a 32bit platform, and so it might be
* possible to see a half-updated value
* Of course reshape_progress could change after
* the lock is dropped, so once we get a reference
* to the stripe that we think it is, we will have
* to check again.
*/
spin_lock_irq(&conf->device_lock);
if (mddev->reshape_backwards
? logical_sector < conf->reshape_progress
: logical_sector >= conf->reshape_progress) {
previous = 1;
} else {
if (mddev->reshape_backwards
? logical_sector < conf->reshape_safe
: logical_sector >= conf->reshape_safe) {
spin_unlock_irq(&conf->device_lock);
schedule();
do_prepare = true;
goto retry;
}
}
spin_unlock_irq(&conf->device_lock);
}
new_sector = raid5_compute_sector(conf, logical_sector,
previous,
&dd_idx, NULL);
pr_debug("raid456: raid5_make_request, sector %llu logical %llu\n",
(unsigned long long)new_sector,
(unsigned long long)logical_sector);
sh = raid5_get_active_stripe(conf, new_sector, previous,
(bi->bi_opf & REQ_RAHEAD), 0);
if (sh) {
if (unlikely(previous)) {
/* expansion might have moved on while waiting for a
* stripe, so we must do the range check again.
* Expansion could still move past after this
* test, but as we are holding a reference to
* 'sh', we know that if that happens,
* STRIPE_EXPANDING will get set and the expansion
* won't proceed until we finish with the stripe.
*/
int must_retry = 0;
spin_lock_irq(&conf->device_lock);
if (mddev->reshape_backwards
? logical_sector >= conf->reshape_progress
: logical_sector < conf->reshape_progress)
/* mismatch, need to try again */
must_retry = 1;
spin_unlock_irq(&conf->device_lock);
if (must_retry) {
raid5_release_stripe(sh);
schedule();
do_prepare = true;
goto retry;
}
}
if (read_seqcount_retry(&conf->gen_lock, seq)) {
/* Might have got the wrong stripe_head
* by accident
*/
raid5_release_stripe(sh);
goto retry;
}
if (test_bit(STRIPE_EXPANDING, &sh->state) ||
!add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
/* Stripe is busy expanding or
* add failed due to overlap. Flush everything
* and wait a while
*/
md_wakeup_thread(mddev->thread);
raid5_release_stripe(sh);
schedule();
do_prepare = true;
goto retry;
}
if (do_flush) {
set_bit(STRIPE_R5C_PREFLUSH, &sh->state);
/* we only need flush for one stripe */
do_flush = false;
}
set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
if ((!sh->batch_head || sh == sh->batch_head) &&
(bi->bi_opf & REQ_SYNC) &&
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
release_stripe_plug(mddev, sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
bi->bi_status = BLK_STS_IOERR;
break;
}
}
finish_wait(&conf->wait_for_overlap, &w);
if (rw == WRITE)
md_write_end(mddev);
bio_endio(bi);
return true;
}