in drbd/drbd_state.c [1701:2026]
static void after_state_ch(struct drbd_device *device, union drbd_state os,
union drbd_state ns, enum chg_state_flags flags,
struct drbd_state_change *state_change)
{
struct drbd_resource *resource = device->resource;
struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
struct sib_info sib;
broadcast_state_change(state_change);
sib.sib_reason = SIB_STATE_CHANGE;
sib.os = os;
sib.ns = ns;
if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE)
&& (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) {
clear_bit(CRASHED_PRIMARY, &device->flags);
if (device->p_uuid)
device->p_uuid[UI_FLAGS] &= ~((u64)2);
}
/* Inform userspace about the change... */
drbd_bcast_event(device, &sib);
if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
(ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
drbd_khelper(device, "pri-on-incon-degr");
/* Here we have the actions that are performed after a
state change. This function might sleep */
if (ns.susp_nod) {
enum drbd_req_event what = NOTHING;
spin_lock_irq(&device->resource->req_lock);
if (os.conn < C_CONNECTED && conn_lowest_conn(connection) >= C_CONNECTED)
what = RESEND;
if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
conn_lowest_disk(connection) == D_UP_TO_DATE)
what = RESTART_FROZEN_DISK_IO;
if (resource->susp_nod && what != NOTHING) {
_tl_restart(connection, what);
_conn_request_state(connection,
(union drbd_state) { { .susp_nod = 1 } },
(union drbd_state) { { .susp_nod = 0 } },
CS_VERBOSE);
}
spin_unlock_irq(&device->resource->req_lock);
}
if (ns.susp_fen) {
spin_lock_irq(&device->resource->req_lock);
if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) {
/* case2: The connection was established again: */
struct drbd_peer_device *peer_device;
int vnr;
rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
clear_bit(NEW_CUR_UUID, &peer_device->device->flags);
rcu_read_unlock();
/* We should actively create a new uuid, _before_
* we resume/resent, if the peer is diskless
* (recovery from a multiple error scenario).
* Currently, this happens with a slight delay
* below when checking lost_contact_to_peer_data() ...
*/
_tl_restart(connection, RESEND);
_conn_request_state(connection,
(union drbd_state) { { .susp_fen = 1 } },
(union drbd_state) { { .susp_fen = 0 } },
CS_VERBOSE);
}
spin_unlock_irq(&device->resource->req_lock);
}
/* Became sync source. With protocol >= 96, we still need to send out
* the sync uuid now. Need to do that before any drbd_send_state, or
* the other side may go "paused sync" before receiving the sync uuids,
* which is unexpected. */
if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
(ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
connection->agreed_pro_version >= 96 && get_ldev(device)) {
drbd_gen_and_send_sync_uuid(peer_device);
put_ldev(device);
}
/* Do not change the order of the if above and the two below... */
if (os.pdsk == D_DISKLESS &&
ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */
/* we probably will start a resync soon.
* make sure those things are properly reset. */
device->rs_total = 0;
device->rs_failed = 0;
atomic_set(&device->rs_pending_cnt, 0);
drbd_rs_cancel_all(device);
drbd_send_uuids(peer_device);
drbd_send_state(peer_device, ns);
}
/* No point in queuing send_bitmap if we don't have a connection
* anymore, so check also the _current_ state, not only the new state
* at the time this work was queued. */
if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
device->state.conn == C_WF_BITMAP_S)
drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL,
"send_bitmap (WFBitMapS)",
BM_LOCKED_TEST_ALLOWED);
/* Lost contact to peer's copy of the data */
if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) {
if (get_ldev(device)) {
if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
if (drbd_suspended(device)) {
set_bit(NEW_CUR_UUID, &device->flags);
} else {
drbd_uuid_new_current(device);
drbd_send_uuids(peer_device);
}
}
put_ldev(device);
}
}
if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) {
if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY &&
device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
drbd_uuid_new_current(device);
drbd_send_uuids(peer_device);
}
/* D_DISKLESS Peer becomes secondary */
if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
/* We may still be Primary ourselves.
* No harm done if the bitmap still changes,
* redirtied pages will follow later. */
drbd_bitmap_io_from_worker(device, &drbd_bm_write,
"demote diskless peer", BM_LOCKED_SET_ALLOWED);
put_ldev(device);
}
/* Write out all changed bits on demote.
* Though, no need to da that just yet
* if there is a resync going on still */
if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
device->state.conn <= C_CONNECTED && get_ldev(device)) {
/* No changes to the bitmap expected this time, so assert that,
* even though no harm was done if it did change. */
drbd_bitmap_io_from_worker(device, &drbd_bm_write,
"demote", BM_LOCKED_TEST_ALLOWED);
put_ldev(device);
}
/* Last part of the attaching process ... */
if (ns.conn >= C_CONNECTED &&
os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
drbd_send_sizes(peer_device, 0, 0); /* to start sync... */
drbd_send_uuids(peer_device);
drbd_send_state(peer_device, ns);
}
/* We want to pause/continue resync, tell peer. */
if (ns.conn >= C_CONNECTED &&
((os.aftr_isp != ns.aftr_isp) ||
(os.user_isp != ns.user_isp)))
drbd_send_state(peer_device, ns);
/* In case one of the isp bits got set, suspend other devices. */
if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
(ns.aftr_isp || ns.peer_isp || ns.user_isp))
suspend_other_sg(device);
/* Make sure the peer gets informed about eventual state
changes (ISP bits) while we were in WFReportParams. */
if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
drbd_send_state(peer_device, ns);
if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
drbd_send_state(peer_device, ns);
/* We are in the progress to start a full sync... */
if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
(os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
/* no other bitmap changes expected during this phase */
drbd_queue_bitmap_io(device,
&drbd_bmio_set_n_write, &abw_start_sync,
"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
/* first half of local IO error, failure to attach,
* or administrative detach */
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
enum drbd_io_error_p eh = EP_PASS_ON;
int was_io_error = 0;
/* corresponding get_ldev was in _drbd_set_state, to serialize
* our cleanup here with the transition to D_DISKLESS.
* But is is still not save to dreference ldev here, since
* we might come from an failed Attach before ldev was set. */
if (device->ldev) {
rcu_read_lock();
eh = rcu_dereference(device->ldev->disk_conf)->on_io_error;
rcu_read_unlock();
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags);
/* Intentionally call this handler first, before drbd_send_state().
* See: 2932204 drbd: call local-io-error handler early
* People may chose to hard-reset the box from this handler.
* It is useful if this looks like a "regular node crash". */
if (was_io_error && eh == EP_CALL_HELPER)
drbd_khelper(device, "local-io-error");
/* Immediately allow completion of all application IO,
* that waits for completion from the local disk,
* if this was a force-detach due to disk_timeout
* or administrator request (drbdsetup detach --force).
* Do NOT abort otherwise.
* Aborting local requests may cause serious problems,
* if requests are completed to upper layers already,
* and then later the already submitted local bio completes.
* This can cause DMA into former bio pages that meanwhile
* have been re-used for other things.
* So aborting local requests may cause crashes,
* or even worse, silent data corruption.
*/
if (test_and_clear_bit(FORCE_DETACH, &device->flags))
tl_abort_disk_io(device);
/* current state still has to be D_FAILED,
* there is only one way out: to D_DISKLESS,
* and that may only happen after our put_ldev below. */
if (device->state.disk != D_FAILED)
drbd_err(device,
"ASSERT FAILED: disk is %s during detach\n",
drbd_disk_str(device->state.disk));
if (ns.conn >= C_CONNECTED)
drbd_send_state(peer_device, ns);
drbd_rs_cancel_all(device);
/* In case we want to get something to stable storage still,
* this may be the last chance.
* Following put_ldev may transition to D_DISKLESS. */
drbd_md_sync(device);
}
put_ldev(device);
}
/* second half of local IO error, failure to attach,
* or administrative detach,
* after local_cnt references have reached zero again */
if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
/* We must still be diskless,
* re-attach has to be serialized with this! */
if (device->state.disk != D_DISKLESS)
drbd_err(device,
"ASSERT FAILED: disk is %s while going diskless\n",
drbd_disk_str(device->state.disk));
if (ns.conn >= C_CONNECTED)
drbd_send_state(peer_device, ns);
/* corresponding get_ldev in __drbd_set_state
* this may finally trigger drbd_ldev_destroy. */
put_ldev(device);
}
/* Notify peer that I had a local IO error, and did not detached.. */
if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
drbd_send_state(peer_device, ns);
/* Disks got bigger while they were detached */
if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
test_and_clear_bit(RESYNC_AFTER_NEG, &device->flags)) {
if (ns.conn == C_CONNECTED)
resync_after_online_grow(device);
}
/* A resync finished or aborted, wake paused devices... */
if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
(os.peer_isp && !ns.peer_isp) ||
(os.user_isp && !ns.user_isp))
resume_next_sg(device);
/* sync target done with resync. Explicitly notify peer, even though
* it should (at least for non-empty resyncs) already know itself. */
if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
drbd_send_state(peer_device, ns);
/* Verify finished, or reached stop sector. Peer did not know about
* the stop sector, and we may even have changed the stop sector during
* verify to interrupt/stop early. Send the new state. */
if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
&& verify_can_do_stop_sector(device))
drbd_send_state(peer_device, ns);
/* This triggers bitmap writeout of potentially still unwritten pages
* if the resync finished cleanly, or aborted because of peer disk
* failure, or on transition from resync back to AHEAD/BEHIND.
*
* Connection loss is handled in drbd_disconnected() by the receiver.
*
* For resync aborted because of local disk failure, we cannot do
* any bitmap writeout anymore.
*
* No harm done if some bits change during this phase.
*/
if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) &&
(ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) {
drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL,
"write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
put_ldev(device);
}
if (ns.disk == D_DISKLESS &&
ns.conn == C_STANDALONE &&
ns.role == R_SECONDARY) {
if (os.aftr_isp != ns.aftr_isp)
resume_next_sg(device);
}
drbd_md_sync(device);
}