long kvmppc_do_h_enter()

in kvm/book3s_hv_rm_mmu.c [184:398]


long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
		       long pte_index, unsigned long pteh, unsigned long ptel,
		       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
{
	unsigned long i, pa, gpa, gfn, psize;
	unsigned long slot_fn, hva;
	__be64 *hpte;
	struct revmap_entry *rev;
	unsigned long g_ptel;
	struct kvm_memory_slot *memslot;
	unsigned hpage_shift;
	bool is_ci;
	unsigned long *rmap;
	pte_t *ptep;
	unsigned int writing;
	unsigned long mmu_seq;
	unsigned long rcbits;

	if (kvm_is_radix(kvm))
		return H_FUNCTION;
	/*
	 * The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so
	 * these functions should work together -- must ensure a guest can not
	 * cause problems with the TLBIE that KVM executes.
	 */
	if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) {
		/* B=0b1x is a reserved value, disallow it. */
		return H_PARAMETER;
	}
	psize = kvmppc_actual_pgsz(pteh, ptel);
	if (!psize)
		return H_PARAMETER;
	writing = hpte_is_writable(ptel);
	pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
	ptel &= ~HPTE_GR_RESERVED;
	g_ptel = ptel;

	/* used later to detect if we might have been invalidated */
	mmu_seq = kvm->mmu_notifier_seq;
	smp_rmb();

	/* Find the memslot (if any) for this address */
	gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
	gfn = gpa >> PAGE_SHIFT;
	memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
	pa = 0;
	is_ci = false;
	rmap = NULL;
	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
		/* Emulated MMIO - mark this with key=31 */
		pteh |= HPTE_V_ABSENT;
		ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
		goto do_insert;
	}

	/* Check if the requested page fits entirely in the memslot. */
	if (!slot_is_aligned(memslot, psize))
		return H_PARAMETER;
	slot_fn = gfn - memslot->base_gfn;
	rmap = &memslot->arch.rmap[slot_fn];

	/* Translate to host virtual address */
	hva = __gfn_to_hva_memslot(memslot, gfn);

	arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
	ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift);
	if (ptep) {
		pte_t pte;
		unsigned int host_pte_size;

		if (hpage_shift)
			host_pte_size = 1ul << hpage_shift;
		else
			host_pte_size = PAGE_SIZE;
		/*
		 * We should always find the guest page size
		 * to <= host page size, if host is using hugepage
		 */
		if (host_pte_size < psize) {
			arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
			return H_PARAMETER;
		}
		pte = kvmppc_read_update_linux_pte(ptep, writing);
		if (pte_present(pte) && !pte_protnone(pte)) {
			if (writing && !__pte_write(pte))
				/* make the actual HPTE be read-only */
				ptel = hpte_make_readonly(ptel);
			is_ci = pte_ci(pte);
			pa = pte_pfn(pte) << PAGE_SHIFT;
			pa |= hva & (host_pte_size - 1);
			pa |= gpa & ~PAGE_MASK;
		}
	}
	arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);

	ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
	ptel |= pa;

	if (pa)
		pteh |= HPTE_V_VALID;
	else {
		pteh |= HPTE_V_ABSENT;
		ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
	}

	/*If we had host pte mapping then  Check WIMG */
	if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
		if (is_ci)
			return H_PARAMETER;
		/*
		 * Allow guest to map emulated device memory as
		 * uncacheable, but actually make it cacheable.
		 */
		ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
		ptel |= HPTE_R_M;
	}

	/* Find and lock the HPTEG slot to use */
 do_insert:
	if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
		return H_PARAMETER;
	if (likely((flags & H_EXACT) == 0)) {
		pte_index &= ~7UL;
		hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
		for (i = 0; i < 8; ++i) {
			if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
			    try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
					  HPTE_V_ABSENT))
				break;
			hpte += 2;
		}
		if (i == 8) {
			/*
			 * Since try_lock_hpte doesn't retry (not even stdcx.
			 * failures), it could be that there is a free slot
			 * but we transiently failed to lock it.  Try again,
			 * actually locking each slot and checking it.
			 */
			hpte -= 16;
			for (i = 0; i < 8; ++i) {
				u64 pte;
				while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
					cpu_relax();
				pte = be64_to_cpu(hpte[0]);
				if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
					break;
				__unlock_hpte(hpte, pte);
				hpte += 2;
			}
			if (i == 8)
				return H_PTEG_FULL;
		}
		pte_index += i;
	} else {
		hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
		if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
				   HPTE_V_ABSENT)) {
			/* Lock the slot and check again */
			u64 pte;

			while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
				cpu_relax();
			pte = be64_to_cpu(hpte[0]);
			if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
				__unlock_hpte(hpte, pte);
				return H_PTEG_FULL;
			}
		}
	}

	/* Save away the guest's idea of the second HPTE dword */
	rev = &kvm->arch.hpt.rev[pte_index];
	if (realmode)
		rev = real_vmalloc_addr(rev);
	if (rev) {
		rev->guest_rpte = g_ptel;
		note_hpte_modification(kvm, rev);
	}

	/* Link HPTE into reverse-map chain */
	if (pteh & HPTE_V_VALID) {
		if (realmode)
			rmap = real_vmalloc_addr(rmap);
		lock_rmap(rmap);
		/* Check for pending invalidations under the rmap chain lock */
		if (mmu_notifier_retry(kvm, mmu_seq)) {
			/* inval in progress, write a non-present HPTE */
			pteh |= HPTE_V_ABSENT;
			pteh &= ~HPTE_V_VALID;
			ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
			unlock_rmap(rmap);
		} else {
			kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
						realmode);
			/* Only set R/C in real HPTE if already set in *rmap */
			rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
			ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
		}
	}

	/* Convert to new format on P9 */
	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
		ptel = hpte_old_to_new_r(pteh, ptel);
		pteh = hpte_old_to_new_v(pteh);
	}
	hpte[1] = cpu_to_be64(ptel);

	/* Write the first HPTE dword, unlocking the HPTE and making it valid */
	eieio();
	__unlock_hpte(hpte, pteh);
	asm volatile("ptesync" : : : "memory");

	*pte_idx_ret = pte_index;
	return H_SUCCESS;
}