in kvm/book3s_hv.c [3591:3908]
static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu;
int i;
int srcu_idx;
struct core_info core_info;
struct kvmppc_vcore *pvc;
struct kvm_split_mode split_info, *sip;
int split, subcore_size, active;
int sub;
bool thr0_done;
unsigned long cmd_bit, stat_bit;
int pcpu, thr;
int target_threads;
int controlled_threads;
int trap;
bool is_power8;
if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
return;
/*
* Remove from the list any threads that have a signal pending
* or need a VPA update done
*/
prepare_threads(vc);
/* if the runner is no longer runnable, let the caller pick a new one */
if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
return;
/*
* Initialize *vc.
*/
init_vcore_to_run(vc);
vc->preempt_tb = TB_NIL;
/*
* Number of threads that we will be controlling: the same as
* the number of threads per subcore, except on POWER9,
* where it's 1 because the threads are (mostly) independent.
*/
controlled_threads = threads_per_vcore(vc->kvm);
/*
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
*/
if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
for_each_runnable_thread(i, vcpu, vc) {
vcpu->arch.ret = -EBUSY;
kvmppc_remove_runnable(vc, vcpu, mftb());
wake_up(&vcpu->arch.cpu_run);
}
goto out;
}
/*
* See if we could run any other vcores on the physical core
* along with this one.
*/
init_core_info(&core_info, vc);
pcpu = smp_processor_id();
target_threads = controlled_threads;
if (target_smt_mode && target_smt_mode < target_threads)
target_threads = target_smt_mode;
if (vc->num_threads < target_threads)
collect_piggybacks(&core_info, target_threads);
/*
* Hard-disable interrupts, and check resched flag and signals.
* If we need to reschedule or deliver a signal, clean up
* and return without going into the guest(s).
* If the mmu_ready flag has been cleared, don't go into the
* guest because that means a HPT resize operation is in progress.
*/
local_irq_disable();
hard_irq_disable();
if (lazy_irq_pending() || need_resched() ||
recheck_signals_and_mmu(&core_info)) {
local_irq_enable();
vc->vcore_state = VCORE_INACTIVE;
/* Unlock all except the primary vcore */
for (sub = 1; sub < core_info.n_subcores; ++sub) {
pvc = core_info.vc[sub];
/* Put back on to the preempted vcores list */
kvmppc_vcore_preempt(pvc);
spin_unlock(&pvc->lock);
}
for (i = 0; i < controlled_threads; ++i)
kvmppc_release_hwthread(pcpu + i);
return;
}
kvmppc_clear_host_core(pcpu);
/* Decide on micro-threading (split-core) mode */
subcore_size = threads_per_subcore;
cmd_bit = stat_bit = 0;
split = core_info.n_subcores;
sip = NULL;
is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
if (split > 1) {
sip = &split_info;
memset(&split_info, 0, sizeof(split_info));
for (sub = 0; sub < core_info.n_subcores; ++sub)
split_info.vc[sub] = core_info.vc[sub];
if (is_power8) {
if (split == 2 && (dynamic_mt_modes & 2)) {
cmd_bit = HID0_POWER8_1TO2LPAR;
stat_bit = HID0_POWER8_2LPARMODE;
} else {
split = 4;
cmd_bit = HID0_POWER8_1TO4LPAR;
stat_bit = HID0_POWER8_4LPARMODE;
}
subcore_size = MAX_SMT_THREADS / split;
split_info.rpr = mfspr(SPRN_RPR);
split_info.pmmar = mfspr(SPRN_PMMAR);
split_info.ldbar = mfspr(SPRN_LDBAR);
split_info.subcore_size = subcore_size;
} else {
split_info.subcore_size = 1;
}
/* order writes to split_info before kvm_split_mode pointer */
smp_wmb();
}
for (thr = 0; thr < controlled_threads; ++thr) {
struct paca_struct *paca = paca_ptrs[pcpu + thr];
paca->kvm_hstate.napping = 0;
paca->kvm_hstate.kvm_split_mode = sip;
}
/* Initiate micro-threading (split-core) on POWER8 if required */
if (cmd_bit) {
unsigned long hid0 = mfspr(SPRN_HID0);
hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
mb();
mtspr(SPRN_HID0, hid0);
isync();
for (;;) {
hid0 = mfspr(SPRN_HID0);
if (hid0 & stat_bit)
break;
cpu_relax();
}
}
/*
* On POWER8, set RWMR register.
* Since it only affects PURR and SPURR, it doesn't affect
* the host, so we don't save/restore the host value.
*/
if (is_power8) {
unsigned long rwmr_val = RWMR_RPA_P8_8THREAD;
int n_online = atomic_read(&vc->online_count);
/*
* Use the 8-thread value if we're doing split-core
* or if the vcore's online count looks bogus.
*/
if (split == 1 && threads_per_subcore == MAX_SMT_THREADS &&
n_online >= 1 && n_online <= MAX_SMT_THREADS)
rwmr_val = p8_rwmr_values[n_online];
mtspr(SPRN_RWMR, rwmr_val);
}
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
thr = is_power8 ? subcore_thread_map[sub] : sub;
thr0_done = false;
active |= 1 << thr;
pvc = core_info.vc[sub];
pvc->pcpu = pcpu + thr;
for_each_runnable_thread(i, vcpu, pvc) {
kvmppc_start_thread(vcpu, pvc);
kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
if (!vcpu->arch.ptid)
thr0_done = true;
active |= 1 << (thr + vcpu->arch.ptid);
}
/*
* We need to start the first thread of each subcore
* even if it doesn't have a vcpu.
*/
if (!thr0_done)
kvmppc_start_thread(NULL, pvc);
}
/*
* Ensure that split_info.do_nap is set after setting
* the vcore pointer in the PACA of the secondaries.
*/
smp_mb();
/*
* When doing micro-threading, poke the inactive threads as well.
* This gets them to the nap instruction after kvm_do_nap,
* which reduces the time taken to unsplit later.
*/
if (cmd_bit) {
split_info.do_nap = 1; /* ask secondaries to nap when done */
for (thr = 1; thr < threads_per_subcore; ++thr)
if (!(active & (1 << thr)))
kvmppc_ipi_thread(pcpu + thr);
}
vc->vcore_state = VCORE_RUNNING;
preempt_disable();
trace_kvmppc_run_core(vc, 0);
for (sub = 0; sub < core_info.n_subcores; ++sub)
spin_unlock(&core_info.vc[sub]->lock);
guest_enter_irqoff();
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
this_cpu_disable_ftrace();
/*
* Interrupts will be enabled once we get into the guest,
* so tell lockdep that we're about to enable interrupts.
*/
trace_hardirqs_on();
trap = __kvmppc_vcore_entry();
trace_hardirqs_off();
this_cpu_enable_ftrace();
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
set_irq_happened(trap);
spin_lock(&vc->lock);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
/* wait for secondary threads to finish writing their state to memory */
kvmppc_wait_for_nap(controlled_threads);
/* Return to whole-core mode if we split the core earlier */
if (cmd_bit) {
unsigned long hid0 = mfspr(SPRN_HID0);
unsigned long loops = 0;
hid0 &= ~HID0_POWER8_DYNLPARDIS;
stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
mb();
mtspr(SPRN_HID0, hid0);
isync();
for (;;) {
hid0 = mfspr(SPRN_HID0);
if (!(hid0 & stat_bit))
break;
cpu_relax();
++loops;
}
split_info.do_nap = 0;
}
kvmppc_set_host_core(pcpu);
context_tracking_guest_exit();
if (!vtime_accounting_enabled_this_cpu()) {
local_irq_enable();
/*
* Service IRQs here before vtime_account_guest_exit() so any
* ticks that occurred while running the guest are accounted to
* the guest. If vtime accounting is enabled, accounting uses
* TB rather than ticks, so it can be done without enabling
* interrupts here, which has the problem that it accounts
* interrupt processing overhead to the host.
*/
local_irq_disable();
}
vtime_account_guest_exit();
local_irq_enable();
/* Let secondaries go back to the offline loop */
for (i = 0; i < controlled_threads; ++i) {
kvmppc_release_hwthread(pcpu + i);
if (sip && sip->napped[i])
kvmppc_ipi_thread(pcpu + i);
}
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
preempt_enable();
for (sub = 0; sub < core_info.n_subcores; ++sub) {
pvc = core_info.vc[sub];
post_guest_process(pvc, pvc == vc);
}
spin_lock(&vc->lock);
out:
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_run_core(vc, 1);
}