static void after_state_ch()

in drbd/drbd_state.c [1701:2026]


static void after_state_ch(struct drbd_device *device, union drbd_state os,
			   union drbd_state ns, enum chg_state_flags flags,
			   struct drbd_state_change *state_change)
{
	struct drbd_resource *resource = device->resource;
	struct drbd_peer_device *peer_device = first_peer_device(device);
	struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
	struct sib_info sib;

	broadcast_state_change(state_change);

	sib.sib_reason = SIB_STATE_CHANGE;
	sib.os = os;
	sib.ns = ns;

	if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE)
	&&  (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) {
		clear_bit(CRASHED_PRIMARY, &device->flags);
		if (device->p_uuid)
			device->p_uuid[UI_FLAGS] &= ~((u64)2);
	}

	/* Inform userspace about the change... */
	drbd_bcast_event(device, &sib);

	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
		drbd_khelper(device, "pri-on-incon-degr");

	/* Here we have the actions that are performed after a
	   state change. This function might sleep */

	if (ns.susp_nod) {
		enum drbd_req_event what = NOTHING;

		spin_lock_irq(&device->resource->req_lock);
		if (os.conn < C_CONNECTED && conn_lowest_conn(connection) >= C_CONNECTED)
			what = RESEND;

		if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
		    conn_lowest_disk(connection) == D_UP_TO_DATE)
			what = RESTART_FROZEN_DISK_IO;

		if (resource->susp_nod && what != NOTHING) {
			_tl_restart(connection, what);
			_conn_request_state(connection,
					    (union drbd_state) { { .susp_nod = 1 } },
					    (union drbd_state) { { .susp_nod = 0 } },
					    CS_VERBOSE);
		}
		spin_unlock_irq(&device->resource->req_lock);
	}

	if (ns.susp_fen) {
		spin_lock_irq(&device->resource->req_lock);
		if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) {
			/* case2: The connection was established again: */
			struct drbd_peer_device *peer_device;
			int vnr;

			rcu_read_lock();
			idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
				clear_bit(NEW_CUR_UUID, &peer_device->device->flags);
			rcu_read_unlock();

			/* We should actively create a new uuid, _before_
			 * we resume/resent, if the peer is diskless
			 * (recovery from a multiple error scenario).
			 * Currently, this happens with a slight delay
			 * below when checking lost_contact_to_peer_data() ...
			 */
			_tl_restart(connection, RESEND);
			_conn_request_state(connection,
					    (union drbd_state) { { .susp_fen = 1 } },
					    (union drbd_state) { { .susp_fen = 0 } },
					    CS_VERBOSE);
		}
		spin_unlock_irq(&device->resource->req_lock);
	}

	/* Became sync source.  With protocol >= 96, we still need to send out
	 * the sync uuid now. Need to do that before any drbd_send_state, or
	 * the other side may go "paused sync" before receiving the sync uuids,
	 * which is unexpected. */
	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
	    (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
	    connection->agreed_pro_version >= 96 && get_ldev(device)) {
		drbd_gen_and_send_sync_uuid(peer_device);
		put_ldev(device);
	}

	/* Do not change the order of the if above and the two below... */
	if (os.pdsk == D_DISKLESS &&
	    ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) {      /* attach on the peer */
		/* we probably will start a resync soon.
		 * make sure those things are properly reset. */
		device->rs_total = 0;
		device->rs_failed = 0;
		atomic_set(&device->rs_pending_cnt, 0);
		drbd_rs_cancel_all(device);

		drbd_send_uuids(peer_device);
		drbd_send_state(peer_device, ns);
	}
	/* No point in queuing send_bitmap if we don't have a connection
	 * anymore, so check also the _current_ state, not only the new state
	 * at the time this work was queued. */
	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
	    device->state.conn == C_WF_BITMAP_S)
		drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL,
				"send_bitmap (WFBitMapS)",
				BM_LOCKED_TEST_ALLOWED);

	/* Lost contact to peer's copy of the data */
	if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) {
		if (get_ldev(device)) {
			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
			    device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
				if (drbd_suspended(device)) {
					set_bit(NEW_CUR_UUID, &device->flags);
				} else {
					drbd_uuid_new_current(device);
					drbd_send_uuids(peer_device);
				}
			}
			put_ldev(device);
		}
	}

	if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) {
		if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY &&
		    device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
			drbd_uuid_new_current(device);
			drbd_send_uuids(peer_device);
		}
		/* D_DISKLESS Peer becomes secondary */
		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
			/* We may still be Primary ourselves.
			 * No harm done if the bitmap still changes,
			 * redirtied pages will follow later. */
			drbd_bitmap_io_from_worker(device, &drbd_bm_write,
				"demote diskless peer", BM_LOCKED_SET_ALLOWED);
		put_ldev(device);
	}

	/* Write out all changed bits on demote.
	 * Though, no need to da that just yet
	 * if there is a resync going on still */
	if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
		device->state.conn <= C_CONNECTED && get_ldev(device)) {
		/* No changes to the bitmap expected this time, so assert that,
		 * even though no harm was done if it did change. */
		drbd_bitmap_io_from_worker(device, &drbd_bm_write,
				"demote", BM_LOCKED_TEST_ALLOWED);
		put_ldev(device);
	}

	/* Last part of the attaching process ... */
	if (ns.conn >= C_CONNECTED &&
	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
		drbd_send_sizes(peer_device, 0, 0);  /* to start sync... */
		drbd_send_uuids(peer_device);
		drbd_send_state(peer_device, ns);
	}

	/* We want to pause/continue resync, tell peer. */
	if (ns.conn >= C_CONNECTED &&
	     ((os.aftr_isp != ns.aftr_isp) ||
	      (os.user_isp != ns.user_isp)))
		drbd_send_state(peer_device, ns);

	/* In case one of the isp bits got set, suspend other devices. */
	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
	    (ns.aftr_isp || ns.peer_isp || ns.user_isp))
		suspend_other_sg(device);

	/* Make sure the peer gets informed about eventual state
	   changes (ISP bits) while we were in WFReportParams. */
	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
		drbd_send_state(peer_device, ns);

	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
		drbd_send_state(peer_device, ns);

	/* We are in the progress to start a full sync... */
	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
		/* no other bitmap changes expected during this phase */
		drbd_queue_bitmap_io(device,
			&drbd_bmio_set_n_write, &abw_start_sync,
			"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);

	/* first half of local IO error, failure to attach,
	 * or administrative detach */
	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
		enum drbd_io_error_p eh = EP_PASS_ON;
		int was_io_error = 0;
		/* corresponding get_ldev was in _drbd_set_state, to serialize
		 * our cleanup here with the transition to D_DISKLESS.
		 * But is is still not save to dreference ldev here, since
		 * we might come from an failed Attach before ldev was set. */
		if (device->ldev) {
			rcu_read_lock();
			eh = rcu_dereference(device->ldev->disk_conf)->on_io_error;
			rcu_read_unlock();

			was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags);

			/* Intentionally call this handler first, before drbd_send_state().
			 * See: 2932204 drbd: call local-io-error handler early
			 * People may chose to hard-reset the box from this handler.
			 * It is useful if this looks like a "regular node crash". */
			if (was_io_error && eh == EP_CALL_HELPER)
				drbd_khelper(device, "local-io-error");

			/* Immediately allow completion of all application IO,
			 * that waits for completion from the local disk,
			 * if this was a force-detach due to disk_timeout
			 * or administrator request (drbdsetup detach --force).
			 * Do NOT abort otherwise.
			 * Aborting local requests may cause serious problems,
			 * if requests are completed to upper layers already,
			 * and then later the already submitted local bio completes.
			 * This can cause DMA into former bio pages that meanwhile
			 * have been re-used for other things.
			 * So aborting local requests may cause crashes,
			 * or even worse, silent data corruption.
			 */
			if (test_and_clear_bit(FORCE_DETACH, &device->flags))
				tl_abort_disk_io(device);

			/* current state still has to be D_FAILED,
			 * there is only one way out: to D_DISKLESS,
			 * and that may only happen after our put_ldev below. */
			if (device->state.disk != D_FAILED)
				drbd_err(device,
					"ASSERT FAILED: disk is %s during detach\n",
					drbd_disk_str(device->state.disk));

			if (ns.conn >= C_CONNECTED)
				drbd_send_state(peer_device, ns);

			drbd_rs_cancel_all(device);

			/* In case we want to get something to stable storage still,
			 * this may be the last chance.
			 * Following put_ldev may transition to D_DISKLESS. */
			drbd_md_sync(device);
		}
		put_ldev(device);
	}

	/* second half of local IO error, failure to attach,
	 * or administrative detach,
	 * after local_cnt references have reached zero again */
	if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
		/* We must still be diskless,
		 * re-attach has to be serialized with this! */
		if (device->state.disk != D_DISKLESS)
			drbd_err(device,
				 "ASSERT FAILED: disk is %s while going diskless\n",
				 drbd_disk_str(device->state.disk));

		if (ns.conn >= C_CONNECTED)
			drbd_send_state(peer_device, ns);
		/* corresponding get_ldev in __drbd_set_state
		 * this may finally trigger drbd_ldev_destroy. */
		put_ldev(device);
	}

	/* Notify peer that I had a local IO error, and did not detached.. */
	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
		drbd_send_state(peer_device, ns);

	/* Disks got bigger while they were detached */
	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
	    test_and_clear_bit(RESYNC_AFTER_NEG, &device->flags)) {
		if (ns.conn == C_CONNECTED)
			resync_after_online_grow(device);
	}

	/* A resync finished or aborted, wake paused devices... */
	if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
	    (os.peer_isp && !ns.peer_isp) ||
	    (os.user_isp && !ns.user_isp))
		resume_next_sg(device);

	/* sync target done with resync.  Explicitly notify peer, even though
	 * it should (at least for non-empty resyncs) already know itself. */
	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
		drbd_send_state(peer_device, ns);

	/* Verify finished, or reached stop sector.  Peer did not know about
	 * the stop sector, and we may even have changed the stop sector during
	 * verify to interrupt/stop early.  Send the new state. */
	if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
	&& verify_can_do_stop_sector(device))
		drbd_send_state(peer_device, ns);

	/* This triggers bitmap writeout of potentially still unwritten pages
	 * if the resync finished cleanly, or aborted because of peer disk
	 * failure, or on transition from resync back to AHEAD/BEHIND.
	 *
	 * Connection loss is handled in drbd_disconnected() by the receiver.
	 *
	 * For resync aborted because of local disk failure, we cannot do
	 * any bitmap writeout anymore.
	 *
	 * No harm done if some bits change during this phase.
	 */
	if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) &&
	    (ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) {
		drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL,
			"write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
		put_ldev(device);
	}

	if (ns.disk == D_DISKLESS &&
	    ns.conn == C_STANDALONE &&
	    ns.role == R_SECONDARY) {
		if (os.aftr_isp != ns.aftr_isp)
			resume_next_sg(device);
	}

	drbd_md_sync(device);
}