in drbd/drbd_worker.c [588:766]
static int make_resync_request(struct drbd_device *const device, int cancel)
{
struct drbd_peer_device *const peer_device = first_peer_device(device);
struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
unsigned long bit;
sector_t sector;
const sector_t capacity = get_capacity(device->vdisk);
int max_bio_size;
int number, rollback_i, size;
int align, requeue = 0;
int i = 0;
int discard_granularity = 0;
if (unlikely(cancel))
return 0;
if (device->rs_total == 0) {
/* empty resync? */
drbd_resync_finished(device);
return 0;
}
if (!get_ldev(device)) {
/* Since we only need to access device->rsync a
get_ldev_if_state(device,D_FAILED) would be sufficient, but
to continue resync with a broken disk makes no sense at
all */
drbd_err(device, "Disk broke down during resync!\n");
return 0;
}
if (connection->agreed_features & DRBD_FF_THIN_RESYNC) {
rcu_read_lock();
discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity;
rcu_read_unlock();
}
max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
number = drbd_rs_number_requests(device);
if (number <= 0)
goto requeue;
for (i = 0; i < number; i++) {
/* Stop generating RS requests when half of the send buffer is filled,
* but notify TCP that we'd like to have more space. */
mutex_lock(&connection->data.mutex);
if (connection->data.socket) {
struct sock *sk = connection->data.socket->sk;
int queued = sk->sk_wmem_queued;
int sndbuf = sk->sk_sndbuf;
if (queued > sndbuf / 2) {
requeue = 1;
if (sk->sk_socket)
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
}
} else
requeue = 1;
mutex_unlock(&connection->data.mutex);
if (requeue)
goto requeue;
next_sector:
size = BM_BLOCK_SIZE;
bit = drbd_bm_find_next(device, device->bm_resync_fo);
if (bit == DRBD_END_OF_BITMAP) {
device->bm_resync_fo = drbd_bm_bits(device);
put_ldev(device);
return 0;
}
sector = BM_BIT_TO_SECT(bit);
if (drbd_try_rs_begin_io(device, sector)) {
device->bm_resync_fo = bit;
goto requeue;
}
device->bm_resync_fo = bit + 1;
if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
drbd_rs_complete_io(device, sector);
goto next_sector;
}
#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
/* try to find some adjacent bits.
* we stop if we have already the maximum req size.
*
* Additionally always align bigger requests, in order to
* be prepared for all stripe sizes of software RAIDs.
*/
align = 1;
rollback_i = i;
while (i < number) {
if (size + BM_BLOCK_SIZE > max_bio_size)
break;
/* Be always aligned */
if (sector & ((1<<(align+3))-1))
break;
if (discard_granularity && size == discard_granularity)
break;
/* do not cross extent boundaries */
if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
break;
/* now, is it actually dirty, after all?
* caution, drbd_bm_test_bit is tri-state for some
* obscure reason; ( b == 0 ) would get the out-of-band
* only accidentally right because of the "oddly sized"
* adjustment below */
if (drbd_bm_test_bit(device, bit+1) != 1)
break;
bit++;
size += BM_BLOCK_SIZE;
if ((BM_BLOCK_SIZE << align) <= size)
align++;
i++;
}
/* if we merged some,
* reset the offset to start the next drbd_bm_find_next from */
if (size > BM_BLOCK_SIZE)
device->bm_resync_fo = bit + 1;
#endif
/* adjust very last sectors, in case we are oddly sized */
if (sector + (size>>9) > capacity)
size = (capacity-sector)<<9;
if (device->use_csums) {
switch (read_for_csum(peer_device, sector, size)) {
case -EIO: /* Disk failure */
put_ldev(device);
return -EIO;
case -EAGAIN: /* allocation failed, or ldev busy */
drbd_rs_complete_io(device, sector);
device->bm_resync_fo = BM_SECT_TO_BIT(sector);
i = rollback_i;
goto requeue;
case 0:
/* everything ok */
break;
default:
BUG();
}
} else {
int err;
inc_rs_pending(device);
err = drbd_send_drequest(peer_device,
size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST,
sector, size, ID_SYNCER);
if (err) {
drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
dec_rs_pending(device);
put_ldev(device);
return err;
}
}
}
if (device->bm_resync_fo >= drbd_bm_bits(device)) {
/* last syncer _request_ was sent,
* but the P_RS_DATA_REPLY not yet received. sync will end (and
* next sync group will resume), as soon as we receive the last
* resync data block, and the last bit is cleared.
* until then resync "work" is "inactive" ...
*/
put_ldev(device);
return 0;
}
requeue:
device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
put_ldev(device);
return 0;
}