static int kvmppc_handle_exit_hv()

in kvm/book3s_hv.c [1494:1815]


static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
				 struct task_struct *tsk)
{
	struct kvm_run *run = vcpu->run;
	int r = RESUME_HOST;

	vcpu->stat.sum_exits++;

	/*
	 * This can happen if an interrupt occurs in the last stages
	 * of guest entry or the first stages of guest exit (i.e. after
	 * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
	 * and before setting it to KVM_GUEST_MODE_HOST_HV).
	 * That can happen due to a bug, or due to a machine check
	 * occurring at just the wrong time.
	 */
	if (vcpu->arch.shregs.msr & MSR_HV) {
		printk(KERN_EMERG "KVM trap in HV mode!\n");
		printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
			vcpu->arch.trap, kvmppc_get_pc(vcpu),
			vcpu->arch.shregs.msr);
		kvmppc_dump_regs(vcpu);
		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
		run->hw.hardware_exit_reason = vcpu->arch.trap;
		return RESUME_HOST;
	}
	run->exit_reason = KVM_EXIT_UNKNOWN;
	run->ready_for_interrupt_injection = 1;
	switch (vcpu->arch.trap) {
	/* We're good on these - the host merely wanted to get our attention */
	case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
		WARN_ON_ONCE(1); /* Should never happen */
		vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
		fallthrough;
	case BOOK3S_INTERRUPT_HV_DECREMENTER:
		vcpu->stat.dec_exits++;
		r = RESUME_GUEST;
		break;
	case BOOK3S_INTERRUPT_EXTERNAL:
	case BOOK3S_INTERRUPT_H_DOORBELL:
	case BOOK3S_INTERRUPT_H_VIRT:
		vcpu->stat.ext_intr_exits++;
		r = RESUME_GUEST;
		break;
	/* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
	case BOOK3S_INTERRUPT_HMI:
	case BOOK3S_INTERRUPT_PERFMON:
	case BOOK3S_INTERRUPT_SYSTEM_RESET:
		r = RESUME_GUEST;
		break;
	case BOOK3S_INTERRUPT_MACHINE_CHECK: {
		static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
					      DEFAULT_RATELIMIT_BURST);
		/*
		 * Print the MCE event to host console. Ratelimit so the guest
		 * can't flood the host log.
		 */
		if (__ratelimit(&rs))
			machine_check_print_event_info(&vcpu->arch.mce_evt,false, true);

		/*
		 * If the guest can do FWNMI, exit to userspace so it can
		 * deliver a FWNMI to the guest.
		 * Otherwise we synthesize a machine check for the guest
		 * so that it knows that the machine check occurred.
		 */
		if (!vcpu->kvm->arch.fwnmi_enabled) {
			ulong flags = vcpu->arch.shregs.msr & 0x083c0000;
			kvmppc_core_queue_machine_check(vcpu, flags);
			r = RESUME_GUEST;
			break;
		}

		/* Exit to guest with KVM_EXIT_NMI as exit reason */
		run->exit_reason = KVM_EXIT_NMI;
		run->hw.hardware_exit_reason = vcpu->arch.trap;
		/* Clear out the old NMI status from run->flags */
		run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
		/* Now set the NMI status */
		if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
			run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
		else
			run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;

		r = RESUME_HOST;
		break;
	}
	case BOOK3S_INTERRUPT_PROGRAM:
	{
		ulong flags;
		/*
		 * Normally program interrupts are delivered directly
		 * to the guest by the hardware, but we can get here
		 * as a result of a hypervisor emulation interrupt
		 * (e40) getting turned into a 700 by BML RTAS.
		 */
		flags = vcpu->arch.shregs.msr & 0x1f0000ull;
		kvmppc_core_queue_program(vcpu, flags);
		r = RESUME_GUEST;
		break;
	}
	case BOOK3S_INTERRUPT_SYSCALL:
	{
		int i;

		if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
			/*
			 * Guest userspace executed sc 1. This can only be
			 * reached by the P9 path because the old path
			 * handles this case in realmode hcall handlers.
			 */
			if (!kvmhv_vcpu_is_radix(vcpu)) {
				/*
				 * A guest could be running PR KVM, so this
				 * may be a PR KVM hcall. It must be reflected
				 * to the guest kernel as a sc interrupt.
				 */
				kvmppc_core_queue_syscall(vcpu);
			} else {
				/*
				 * Radix guests can not run PR KVM or nested HV
				 * hash guests which might run PR KVM, so this
				 * is always a privilege fault. Send a program
				 * check to guest kernel.
				 */
				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
			}
			r = RESUME_GUEST;
			break;
		}

		/*
		 * hcall - gather args and set exit_reason. This will next be
		 * handled by kvmppc_pseries_do_hcall which may be able to deal
		 * with it and resume guest, or may punt to userspace.
		 */
		run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
		for (i = 0; i < 9; ++i)
			run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
		run->exit_reason = KVM_EXIT_PAPR_HCALL;
		vcpu->arch.hcall_needed = 1;
		r = RESUME_HOST;
		break;
	}
	/*
	 * We get these next two if the guest accesses a page which it thinks
	 * it has mapped but which is not actually present, either because
	 * it is for an emulated I/O device or because the corresonding
	 * host page has been paged out.
	 *
	 * Any other HDSI/HISI interrupts have been handled already for P7/8
	 * guests. For POWER9 hash guests not using rmhandlers, basic hash
	 * fault handling is done here.
	 */
	case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
		unsigned long vsid;
		long err;

		if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
		    unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) {
			r = RESUME_GUEST; /* Just retry if it's the canary */
			break;
		}

		if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
			/*
			 * Radix doesn't require anything, and pre-ISAv3.0 hash
			 * already attempted to handle this in rmhandlers. The
			 * hash fault handling below is v3 only (it uses ASDR
			 * via fault_gpa).
			 */
			r = RESUME_PAGE_FAULT;
			break;
		}

		if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
			kvmppc_core_queue_data_storage(vcpu,
				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
			r = RESUME_GUEST;
			break;
		}

		if (!(vcpu->arch.shregs.msr & MSR_DR))
			vsid = vcpu->kvm->arch.vrma_slb_v;
		else
			vsid = vcpu->arch.fault_gpa;

		err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
				vsid, vcpu->arch.fault_dsisr, true);
		if (err == 0) {
			r = RESUME_GUEST;
		} else if (err == -1 || err == -2) {
			r = RESUME_PAGE_FAULT;
		} else {
			kvmppc_core_queue_data_storage(vcpu,
				vcpu->arch.fault_dar, err);
			r = RESUME_GUEST;
		}
		break;
	}
	case BOOK3S_INTERRUPT_H_INST_STORAGE: {
		unsigned long vsid;
		long err;

		vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
		vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
			DSISR_SRR1_MATCH_64S;
		if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
			/*
			 * Radix doesn't require anything, and pre-ISAv3.0 hash
			 * already attempted to handle this in rmhandlers. The
			 * hash fault handling below is v3 only (it uses ASDR
			 * via fault_gpa).
			 */
			if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
				vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
			r = RESUME_PAGE_FAULT;
			break;
		}

		if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
			kvmppc_core_queue_inst_storage(vcpu,
				vcpu->arch.fault_dsisr);
			r = RESUME_GUEST;
			break;
		}

		if (!(vcpu->arch.shregs.msr & MSR_IR))
			vsid = vcpu->kvm->arch.vrma_slb_v;
		else
			vsid = vcpu->arch.fault_gpa;

		err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
				vsid, vcpu->arch.fault_dsisr, false);
		if (err == 0) {
			r = RESUME_GUEST;
		} else if (err == -1) {
			r = RESUME_PAGE_FAULT;
		} else {
			kvmppc_core_queue_inst_storage(vcpu, err);
			r = RESUME_GUEST;
		}
		break;
	}

	/*
	 * This occurs if the guest executes an illegal instruction.
	 * If the guest debug is disabled, generate a program interrupt
	 * to the guest. If guest debug is enabled, we need to check
	 * whether the instruction is a software breakpoint instruction.
	 * Accordingly return to Guest or Host.
	 */
	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
		if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
			vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
				swab32(vcpu->arch.emul_inst) :
				vcpu->arch.emul_inst;
		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
			r = kvmppc_emulate_debug_inst(vcpu);
		} else {
			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
			r = RESUME_GUEST;
		}
		break;

#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
	case BOOK3S_INTERRUPT_HV_SOFTPATCH:
		/*
		 * This occurs for various TM-related instructions that
		 * we need to emulate on POWER9 DD2.2.  We have already
		 * handled the cases where the guest was in real-suspend
		 * mode and was transitioning to transactional state.
		 */
		r = kvmhv_p9_tm_emulation(vcpu);
		if (r != -1)
			break;
		fallthrough; /* go to facility unavailable handler */
#endif

	/*
	 * This occurs if the guest (kernel or userspace), does something that
	 * is prohibited by HFSCR.
	 * On POWER9, this could be a doorbell instruction that we need
	 * to emulate.
	 * Otherwise, we just generate a program interrupt to the guest.
	 */
	case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
		u64 cause = vcpu->arch.hfscr >> 56;

		r = EMULATE_FAIL;
		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
			if (cause == FSCR_MSGP_LG)
				r = kvmppc_emulate_doorbell_instr(vcpu);
			if (cause == FSCR_PM_LG)
				r = kvmppc_pmu_unavailable(vcpu);
			if (cause == FSCR_EBB_LG)
				r = kvmppc_ebb_unavailable(vcpu);
			if (cause == FSCR_TM_LG)
				r = kvmppc_tm_unavailable(vcpu);
		}
		if (r == EMULATE_FAIL) {
			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
			r = RESUME_GUEST;
		}
		break;
	}

	case BOOK3S_INTERRUPT_HV_RM_HARD:
		r = RESUME_PASSTHROUGH;
		break;
	default:
		kvmppc_dump_regs(vcpu);
		printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
			vcpu->arch.trap, kvmppc_get_pc(vcpu),
			vcpu->arch.shregs.msr);
		run->hw.hardware_exit_reason = vcpu->arch.trap;
		r = RESUME_HOST;
		break;
	}

	return r;
}