static noinline void kvmppc_run_core()

in kvm/book3s_hv.c [3591:3908]


static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
{
	struct kvm_vcpu *vcpu;
	int i;
	int srcu_idx;
	struct core_info core_info;
	struct kvmppc_vcore *pvc;
	struct kvm_split_mode split_info, *sip;
	int split, subcore_size, active;
	int sub;
	bool thr0_done;
	unsigned long cmd_bit, stat_bit;
	int pcpu, thr;
	int target_threads;
	int controlled_threads;
	int trap;
	bool is_power8;

	if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
		return;

	/*
	 * Remove from the list any threads that have a signal pending
	 * or need a VPA update done
	 */
	prepare_threads(vc);

	/* if the runner is no longer runnable, let the caller pick a new one */
	if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
		return;

	/*
	 * Initialize *vc.
	 */
	init_vcore_to_run(vc);
	vc->preempt_tb = TB_NIL;

	/*
	 * Number of threads that we will be controlling: the same as
	 * the number of threads per subcore, except on POWER9,
	 * where it's 1 because the threads are (mostly) independent.
	 */
	controlled_threads = threads_per_vcore(vc->kvm);

	/*
	 * Make sure we are running on primary threads, and that secondary
	 * threads are offline.  Also check if the number of threads in this
	 * guest are greater than the current system threads per guest.
	 */
	if ((controlled_threads > 1) &&
	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
		for_each_runnable_thread(i, vcpu, vc) {
			vcpu->arch.ret = -EBUSY;
			kvmppc_remove_runnable(vc, vcpu, mftb());
			wake_up(&vcpu->arch.cpu_run);
		}
		goto out;
	}

	/*
	 * See if we could run any other vcores on the physical core
	 * along with this one.
	 */
	init_core_info(&core_info, vc);
	pcpu = smp_processor_id();
	target_threads = controlled_threads;
	if (target_smt_mode && target_smt_mode < target_threads)
		target_threads = target_smt_mode;
	if (vc->num_threads < target_threads)
		collect_piggybacks(&core_info, target_threads);

	/*
	 * Hard-disable interrupts, and check resched flag and signals.
	 * If we need to reschedule or deliver a signal, clean up
	 * and return without going into the guest(s).
	 * If the mmu_ready flag has been cleared, don't go into the
	 * guest because that means a HPT resize operation is in progress.
	 */
	local_irq_disable();
	hard_irq_disable();
	if (lazy_irq_pending() || need_resched() ||
	    recheck_signals_and_mmu(&core_info)) {
		local_irq_enable();
		vc->vcore_state = VCORE_INACTIVE;
		/* Unlock all except the primary vcore */
		for (sub = 1; sub < core_info.n_subcores; ++sub) {
			pvc = core_info.vc[sub];
			/* Put back on to the preempted vcores list */
			kvmppc_vcore_preempt(pvc);
			spin_unlock(&pvc->lock);
		}
		for (i = 0; i < controlled_threads; ++i)
			kvmppc_release_hwthread(pcpu + i);
		return;
	}

	kvmppc_clear_host_core(pcpu);

	/* Decide on micro-threading (split-core) mode */
	subcore_size = threads_per_subcore;
	cmd_bit = stat_bit = 0;
	split = core_info.n_subcores;
	sip = NULL;
	is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);

	if (split > 1) {
		sip = &split_info;
		memset(&split_info, 0, sizeof(split_info));
		for (sub = 0; sub < core_info.n_subcores; ++sub)
			split_info.vc[sub] = core_info.vc[sub];

		if (is_power8) {
			if (split == 2 && (dynamic_mt_modes & 2)) {
				cmd_bit = HID0_POWER8_1TO2LPAR;
				stat_bit = HID0_POWER8_2LPARMODE;
			} else {
				split = 4;
				cmd_bit = HID0_POWER8_1TO4LPAR;
				stat_bit = HID0_POWER8_4LPARMODE;
			}
			subcore_size = MAX_SMT_THREADS / split;
			split_info.rpr = mfspr(SPRN_RPR);
			split_info.pmmar = mfspr(SPRN_PMMAR);
			split_info.ldbar = mfspr(SPRN_LDBAR);
			split_info.subcore_size = subcore_size;
		} else {
			split_info.subcore_size = 1;
		}

		/* order writes to split_info before kvm_split_mode pointer */
		smp_wmb();
	}

	for (thr = 0; thr < controlled_threads; ++thr) {
		struct paca_struct *paca = paca_ptrs[pcpu + thr];

		paca->kvm_hstate.napping = 0;
		paca->kvm_hstate.kvm_split_mode = sip;
	}

	/* Initiate micro-threading (split-core) on POWER8 if required */
	if (cmd_bit) {
		unsigned long hid0 = mfspr(SPRN_HID0);

		hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
		mb();
		mtspr(SPRN_HID0, hid0);
		isync();
		for (;;) {
			hid0 = mfspr(SPRN_HID0);
			if (hid0 & stat_bit)
				break;
			cpu_relax();
		}
	}

	/*
	 * On POWER8, set RWMR register.
	 * Since it only affects PURR and SPURR, it doesn't affect
	 * the host, so we don't save/restore the host value.
	 */
	if (is_power8) {
		unsigned long rwmr_val = RWMR_RPA_P8_8THREAD;
		int n_online = atomic_read(&vc->online_count);

		/*
		 * Use the 8-thread value if we're doing split-core
		 * or if the vcore's online count looks bogus.
		 */
		if (split == 1 && threads_per_subcore == MAX_SMT_THREADS &&
		    n_online >= 1 && n_online <= MAX_SMT_THREADS)
			rwmr_val = p8_rwmr_values[n_online];
		mtspr(SPRN_RWMR, rwmr_val);
	}

	/* Start all the threads */
	active = 0;
	for (sub = 0; sub < core_info.n_subcores; ++sub) {
		thr = is_power8 ? subcore_thread_map[sub] : sub;
		thr0_done = false;
		active |= 1 << thr;
		pvc = core_info.vc[sub];
		pvc->pcpu = pcpu + thr;
		for_each_runnable_thread(i, vcpu, pvc) {
			kvmppc_start_thread(vcpu, pvc);
			kvmppc_create_dtl_entry(vcpu, pvc);
			trace_kvm_guest_enter(vcpu);
			if (!vcpu->arch.ptid)
				thr0_done = true;
			active |= 1 << (thr + vcpu->arch.ptid);
		}
		/*
		 * We need to start the first thread of each subcore
		 * even if it doesn't have a vcpu.
		 */
		if (!thr0_done)
			kvmppc_start_thread(NULL, pvc);
	}

	/*
	 * Ensure that split_info.do_nap is set after setting
	 * the vcore pointer in the PACA of the secondaries.
	 */
	smp_mb();

	/*
	 * When doing micro-threading, poke the inactive threads as well.
	 * This gets them to the nap instruction after kvm_do_nap,
	 * which reduces the time taken to unsplit later.
	 */
	if (cmd_bit) {
		split_info.do_nap = 1;	/* ask secondaries to nap when done */
		for (thr = 1; thr < threads_per_subcore; ++thr)
			if (!(active & (1 << thr)))
				kvmppc_ipi_thread(pcpu + thr);
	}

	vc->vcore_state = VCORE_RUNNING;
	preempt_disable();

	trace_kvmppc_run_core(vc, 0);

	for (sub = 0; sub < core_info.n_subcores; ++sub)
		spin_unlock(&core_info.vc[sub]->lock);

	guest_enter_irqoff();

	srcu_idx = srcu_read_lock(&vc->kvm->srcu);

	this_cpu_disable_ftrace();

	/*
	 * Interrupts will be enabled once we get into the guest,
	 * so tell lockdep that we're about to enable interrupts.
	 */
	trace_hardirqs_on();

	trap = __kvmppc_vcore_entry();

	trace_hardirqs_off();

	this_cpu_enable_ftrace();

	srcu_read_unlock(&vc->kvm->srcu, srcu_idx);

	set_irq_happened(trap);

	spin_lock(&vc->lock);
	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
	vc->vcore_state = VCORE_EXITING;

	/* wait for secondary threads to finish writing their state to memory */
	kvmppc_wait_for_nap(controlled_threads);

	/* Return to whole-core mode if we split the core earlier */
	if (cmd_bit) {
		unsigned long hid0 = mfspr(SPRN_HID0);
		unsigned long loops = 0;

		hid0 &= ~HID0_POWER8_DYNLPARDIS;
		stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
		mb();
		mtspr(SPRN_HID0, hid0);
		isync();
		for (;;) {
			hid0 = mfspr(SPRN_HID0);
			if (!(hid0 & stat_bit))
				break;
			cpu_relax();
			++loops;
		}
		split_info.do_nap = 0;
	}

	kvmppc_set_host_core(pcpu);

	context_tracking_guest_exit();
	if (!vtime_accounting_enabled_this_cpu()) {
		local_irq_enable();
		/*
		 * Service IRQs here before vtime_account_guest_exit() so any
		 * ticks that occurred while running the guest are accounted to
		 * the guest. If vtime accounting is enabled, accounting uses
		 * TB rather than ticks, so it can be done without enabling
		 * interrupts here, which has the problem that it accounts
		 * interrupt processing overhead to the host.
		 */
		local_irq_disable();
	}
	vtime_account_guest_exit();

	local_irq_enable();

	/* Let secondaries go back to the offline loop */
	for (i = 0; i < controlled_threads; ++i) {
		kvmppc_release_hwthread(pcpu + i);
		if (sip && sip->napped[i])
			kvmppc_ipi_thread(pcpu + i);
	}

	spin_unlock(&vc->lock);

	/* make sure updates to secondary vcpu structs are visible now */
	smp_mb();

	preempt_enable();

	for (sub = 0; sub < core_info.n_subcores; ++sub) {
		pvc = core_info.vc[sub];
		post_guest_process(pvc, pvc == vc);
	}

	spin_lock(&vc->lock);

 out:
	vc->vcore_state = VCORE_INACTIVE;
	trace_kvmppc_run_core(vc, 1);
}