static int raid10_handle_discard()

in raid10.c [1611:1858]


static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
{
	struct r10conf *conf = mddev->private;
	struct geom *geo = &conf->geo;
	int far_copies = geo->far_copies;
	bool first_copy = true;
	struct r10bio *r10_bio, *first_r10bio;
	struct bio *split;
	int disk;
	sector_t chunk;
	unsigned int stripe_size;
	unsigned int stripe_data_disks;
	sector_t split_size;
	sector_t bio_start, bio_end;
	sector_t first_stripe_index, last_stripe_index;
	sector_t start_disk_offset;
	unsigned int start_disk_index;
	sector_t end_disk_offset;
	unsigned int end_disk_index;
	unsigned int remainder;

	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
		return -EAGAIN;

	if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) {
		bio_wouldblock_error(bio);
		return 0;
	}
	wait_barrier(conf, false);

	/*
	 * Check reshape again to avoid reshape happens after checking
	 * MD_RECOVERY_RESHAPE and before wait_barrier
	 */
	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
		goto out;

	if (geo->near_copies)
		stripe_data_disks = geo->raid_disks / geo->near_copies +
					geo->raid_disks % geo->near_copies;
	else
		stripe_data_disks = geo->raid_disks;

	stripe_size = stripe_data_disks << geo->chunk_shift;

	bio_start = bio->bi_iter.bi_sector;
	bio_end = bio_end_sector(bio);

	/*
	 * Maybe one discard bio is smaller than strip size or across one
	 * stripe and discard region is larger than one stripe size. For far
	 * offset layout, if the discard region is not aligned with stripe
	 * size, there is hole when we submit discard bio to member disk.
	 * For simplicity, we only handle discard bio which discard region
	 * is bigger than stripe_size * 2
	 */
	if (bio_sectors(bio) < stripe_size*2)
		goto out;

	/*
	 * Keep bio aligned with strip size.
	 */
	div_u64_rem(bio_start, stripe_size, &remainder);
	if (remainder) {
		split_size = stripe_size - remainder;
		split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
		bio_chain(split, bio);
		allow_barrier(conf);
		/* Resend the fist split part */
		submit_bio_noacct(split);
		wait_barrier(conf, false);
	}
	div_u64_rem(bio_end, stripe_size, &remainder);
	if (remainder) {
		split_size = bio_sectors(bio) - remainder;
		split = bio_split(bio, split_size, GFP_NOIO, &conf->bio_split);
		bio_chain(split, bio);
		allow_barrier(conf);
		/* Resend the second split part */
		submit_bio_noacct(bio);
		bio = split;
		wait_barrier(conf, false);
	}

	bio_start = bio->bi_iter.bi_sector;
	bio_end = bio_end_sector(bio);

	/*
	 * Raid10 uses chunk as the unit to store data. It's similar like raid0.
	 * One stripe contains the chunks from all member disk (one chunk from
	 * one disk at the same HBA address). For layout detail, see 'man md 4'
	 */
	chunk = bio_start >> geo->chunk_shift;
	chunk *= geo->near_copies;
	first_stripe_index = chunk;
	start_disk_index = sector_div(first_stripe_index, geo->raid_disks);
	if (geo->far_offset)
		first_stripe_index *= geo->far_copies;
	start_disk_offset = (bio_start & geo->chunk_mask) +
				(first_stripe_index << geo->chunk_shift);

	chunk = bio_end >> geo->chunk_shift;
	chunk *= geo->near_copies;
	last_stripe_index = chunk;
	end_disk_index = sector_div(last_stripe_index, geo->raid_disks);
	if (geo->far_offset)
		last_stripe_index *= geo->far_copies;
	end_disk_offset = (bio_end & geo->chunk_mask) +
				(last_stripe_index << geo->chunk_shift);

retry_discard:
	r10_bio = mempool_alloc(&conf->r10bio_pool, GFP_NOIO);
	r10_bio->mddev = mddev;
	r10_bio->state = 0;
	r10_bio->sectors = 0;
	memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * geo->raid_disks);
	wait_blocked_dev(mddev, r10_bio);

	/*
	 * For far layout it needs more than one r10bio to cover all regions.
	 * Inspired by raid10_sync_request, we can use the first r10bio->master_bio
	 * to record the discard bio. Other r10bio->master_bio record the first
	 * r10bio. The first r10bio only release after all other r10bios finish.
	 * The discard bio returns only first r10bio finishes
	 */
	if (first_copy) {
		r10_bio->master_bio = bio;
		set_bit(R10BIO_Discard, &r10_bio->state);
		first_copy = false;
		first_r10bio = r10_bio;
	} else
		r10_bio->master_bio = (struct bio *)first_r10bio;

	/*
	 * first select target devices under rcu_lock and
	 * inc refcount on their rdev.  Record them by setting
	 * bios[x] to bio
	 */
	rcu_read_lock();
	for (disk = 0; disk < geo->raid_disks; disk++) {
		struct md_rdev *rdev = rcu_dereference(conf->mirrors[disk].rdev);
		struct md_rdev *rrdev = rcu_dereference(
			conf->mirrors[disk].replacement);

		r10_bio->devs[disk].bio = NULL;
		r10_bio->devs[disk].repl_bio = NULL;

		if (rdev && (test_bit(Faulty, &rdev->flags)))
			rdev = NULL;
		if (rrdev && (test_bit(Faulty, &rrdev->flags)))
			rrdev = NULL;
		if (!rdev && !rrdev)
			continue;

		if (rdev) {
			r10_bio->devs[disk].bio = bio;
			atomic_inc(&rdev->nr_pending);
		}
		if (rrdev) {
			r10_bio->devs[disk].repl_bio = bio;
			atomic_inc(&rrdev->nr_pending);
		}
	}
	rcu_read_unlock();

	atomic_set(&r10_bio->remaining, 1);
	for (disk = 0; disk < geo->raid_disks; disk++) {
		sector_t dev_start, dev_end;
		struct bio *mbio, *rbio = NULL;

		/*
		 * Now start to calculate the start and end address for each disk.
		 * The space between dev_start and dev_end is the discard region.
		 *
		 * For dev_start, it needs to consider three conditions:
		 * 1st, the disk is before start_disk, you can imagine the disk in
		 * the next stripe. So the dev_start is the start address of next
		 * stripe.
		 * 2st, the disk is after start_disk, it means the disk is at the
		 * same stripe of first disk
		 * 3st, the first disk itself, we can use start_disk_offset directly
		 */
		if (disk < start_disk_index)
			dev_start = (first_stripe_index + 1) * mddev->chunk_sectors;
		else if (disk > start_disk_index)
			dev_start = first_stripe_index * mddev->chunk_sectors;
		else
			dev_start = start_disk_offset;

		if (disk < end_disk_index)
			dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
		else if (disk > end_disk_index)
			dev_end = last_stripe_index * mddev->chunk_sectors;
		else
			dev_end = end_disk_offset;

		/*
		 * It only handles discard bio which size is >= stripe size, so
		 * dev_end > dev_start all the time.
		 * It doesn't need to use rcu lock to get rdev here. We already
		 * add rdev->nr_pending in the first loop.
		 */
		if (r10_bio->devs[disk].bio) {
			struct md_rdev *rdev = conf->mirrors[disk].rdev;
			mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
			mbio->bi_end_io = raid10_end_discard_request;
			mbio->bi_private = r10_bio;
			r10_bio->devs[disk].bio = mbio;
			r10_bio->devs[disk].devnum = disk;
			atomic_inc(&r10_bio->remaining);
			md_submit_discard_bio(mddev, rdev, mbio,
					dev_start + choose_data_offset(r10_bio, rdev),
					dev_end - dev_start);
			bio_endio(mbio);
		}
		if (r10_bio->devs[disk].repl_bio) {
			struct md_rdev *rrdev = conf->mirrors[disk].replacement;
			rbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
			rbio->bi_end_io = raid10_end_discard_request;
			rbio->bi_private = r10_bio;
			r10_bio->devs[disk].repl_bio = rbio;
			r10_bio->devs[disk].devnum = disk;
			atomic_inc(&r10_bio->remaining);
			md_submit_discard_bio(mddev, rrdev, rbio,
					dev_start + choose_data_offset(r10_bio, rrdev),
					dev_end - dev_start);
			bio_endio(rbio);
		}
	}

	if (!geo->far_offset && --far_copies) {
		first_stripe_index += geo->stride >> geo->chunk_shift;
		start_disk_offset += geo->stride;
		last_stripe_index += geo->stride >> geo->chunk_shift;
		end_disk_offset += geo->stride;
		atomic_inc(&first_r10bio->remaining);
		raid_end_discard_bio(r10_bio);
		wait_barrier(conf, false);
		goto retry_discard;
	}

	raid_end_discard_bio(r10_bio);

	return 0;
out:
	allow_barrier(conf);
	return -EAGAIN;
}