in kvm/book3s_hv_rm_mmu.c [184:398]
long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel,
pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
{
unsigned long i, pa, gpa, gfn, psize;
unsigned long slot_fn, hva;
__be64 *hpte;
struct revmap_entry *rev;
unsigned long g_ptel;
struct kvm_memory_slot *memslot;
unsigned hpage_shift;
bool is_ci;
unsigned long *rmap;
pte_t *ptep;
unsigned int writing;
unsigned long mmu_seq;
unsigned long rcbits;
if (kvm_is_radix(kvm))
return H_FUNCTION;
/*
* The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so
* these functions should work together -- must ensure a guest can not
* cause problems with the TLBIE that KVM executes.
*/
if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) {
/* B=0b1x is a reserved value, disallow it. */
return H_PARAMETER;
}
psize = kvmppc_actual_pgsz(pteh, ptel);
if (!psize)
return H_PARAMETER;
writing = hpte_is_writable(ptel);
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
ptel &= ~HPTE_GR_RESERVED;
g_ptel = ptel;
/* used later to detect if we might have been invalidated */
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
/* Find the memslot (if any) for this address */
gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
gfn = gpa >> PAGE_SHIFT;
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
pa = 0;
is_ci = false;
rmap = NULL;
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
/* Emulated MMIO - mark this with key=31 */
pteh |= HPTE_V_ABSENT;
ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
goto do_insert;
}
/* Check if the requested page fits entirely in the memslot. */
if (!slot_is_aligned(memslot, psize))
return H_PARAMETER;
slot_fn = gfn - memslot->base_gfn;
rmap = &memslot->arch.rmap[slot_fn];
/* Translate to host virtual address */
hva = __gfn_to_hva_memslot(memslot, gfn);
arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift);
if (ptep) {
pte_t pte;
unsigned int host_pte_size;
if (hpage_shift)
host_pte_size = 1ul << hpage_shift;
else
host_pte_size = PAGE_SIZE;
/*
* We should always find the guest page size
* to <= host page size, if host is using hugepage
*/
if (host_pte_size < psize) {
arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
return H_PARAMETER;
}
pte = kvmppc_read_update_linux_pte(ptep, writing);
if (pte_present(pte) && !pte_protnone(pte)) {
if (writing && !__pte_write(pte))
/* make the actual HPTE be read-only */
ptel = hpte_make_readonly(ptel);
is_ci = pte_ci(pte);
pa = pte_pfn(pte) << PAGE_SHIFT;
pa |= hva & (host_pte_size - 1);
pa |= gpa & ~PAGE_MASK;
}
}
arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
ptel |= pa;
if (pa)
pteh |= HPTE_V_VALID;
else {
pteh |= HPTE_V_ABSENT;
ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
}
/*If we had host pte mapping then Check WIMG */
if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
if (is_ci)
return H_PARAMETER;
/*
* Allow guest to map emulated device memory as
* uncacheable, but actually make it cacheable.
*/
ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
ptel |= HPTE_R_M;
}
/* Find and lock the HPTEG slot to use */
do_insert:
if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
if (likely((flags & H_EXACT) == 0)) {
pte_index &= ~7UL;
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
for (i = 0; i < 8; ++i) {
if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
HPTE_V_ABSENT))
break;
hpte += 2;
}
if (i == 8) {
/*
* Since try_lock_hpte doesn't retry (not even stdcx.
* failures), it could be that there is a free slot
* but we transiently failed to lock it. Try again,
* actually locking each slot and checking it.
*/
hpte -= 16;
for (i = 0; i < 8; ++i) {
u64 pte;
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
pte = be64_to_cpu(hpte[0]);
if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
break;
__unlock_hpte(hpte, pte);
hpte += 2;
}
if (i == 8)
return H_PTEG_FULL;
}
pte_index += i;
} else {
hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
HPTE_V_ABSENT)) {
/* Lock the slot and check again */
u64 pte;
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
pte = be64_to_cpu(hpte[0]);
if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
__unlock_hpte(hpte, pte);
return H_PTEG_FULL;
}
}
}
/* Save away the guest's idea of the second HPTE dword */
rev = &kvm->arch.hpt.rev[pte_index];
if (realmode)
rev = real_vmalloc_addr(rev);
if (rev) {
rev->guest_rpte = g_ptel;
note_hpte_modification(kvm, rev);
}
/* Link HPTE into reverse-map chain */
if (pteh & HPTE_V_VALID) {
if (realmode)
rmap = real_vmalloc_addr(rmap);
lock_rmap(rmap);
/* Check for pending invalidations under the rmap chain lock */
if (mmu_notifier_retry(kvm, mmu_seq)) {
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
unlock_rmap(rmap);
} else {
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
realmode);
/* Only set R/C in real HPTE if already set in *rmap */
rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
ptel &= rcbits | ~(HPTE_R_R | HPTE_R_C);
}
}
/* Convert to new format on P9 */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
ptel = hpte_old_to_new_r(pteh, ptel);
pteh = hpte_old_to_new_v(pteh);
}
hpte[1] = cpu_to_be64(ptel);
/* Write the first HPTE dword, unlocking the HPTE and making it valid */
eieio();
__unlock_hpte(hpte, pteh);
asm volatile("ptesync" : : : "memory");
*pte_idx_ret = pte_index;
return H_SUCCESS;
}