in kvm/book3s_hv_nested.c [1478:1645]
static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
struct kvm_nested_guest *gp)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_memory_slot *memslot;
struct rmap_nested *n_rmap;
struct kvmppc_pte gpte;
pte_t pte, *pte_p;
unsigned long mmu_seq;
unsigned long dsisr = vcpu->arch.fault_dsisr;
unsigned long ea = vcpu->arch.fault_dar;
unsigned long *rmapp;
unsigned long n_gpa, gpa, gfn, perm = 0UL;
unsigned int shift, l1_shift, level;
bool writing = !!(dsisr & DSISR_ISSTORE);
bool kvm_ro = false;
long int ret;
if (!gp->l1_gr_to_hr) {
kvmhv_update_ptbl_cache(gp);
if (!gp->l1_gr_to_hr)
return RESUME_HOST;
}
/* Convert the nested guest real address into a L1 guest real address */
n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
if (!(dsisr & DSISR_PRTABLE_FAULT))
n_gpa |= ea & 0xFFF;
ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
/*
* If the hardware found a translation but we don't now have a usable
* translation in the l1 partition-scoped tree, remove the shadow pte
* and let the guest retry.
*/
if (ret == RESUME_HOST &&
(dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
DSISR_BAD_COPYPASTE)))
goto inval;
if (ret)
return ret;
/* Failed to set the reference/change bits */
if (dsisr & DSISR_SET_RC) {
ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
if (ret == RESUME_HOST)
return ret;
if (ret)
goto inval;
dsisr &= ~DSISR_SET_RC;
if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
DSISR_PROTFAULT)))
return RESUME_GUEST;
}
/*
* We took an HISI or HDSI while we were running a nested guest which
* means we have no partition scoped translation for that. This means
* we need to insert a pte for the mapping into our shadow_pgtable.
*/
l1_shift = gpte.page_shift;
if (l1_shift < PAGE_SHIFT) {
/* We don't support l1 using a page size smaller than our own */
pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
l1_shift, PAGE_SHIFT);
return -EINVAL;
}
gpa = gpte.raddr;
gfn = gpa >> PAGE_SHIFT;
/* 1. Get the corresponding host memslot */
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
/* unusual error -> reflect to the guest as a DSI */
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
return RESUME_GUEST;
}
/* passthrough of emulated MMIO case */
return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
}
if (memslot->flags & KVM_MEM_READONLY) {
if (writing) {
/* Give the guest a DSI */
kvmppc_core_queue_data_storage(vcpu, ea,
DSISR_ISSTORE | DSISR_PROTFAULT);
return RESUME_GUEST;
}
kvm_ro = true;
}
/* 2. Find the host pte for this L1 guest real address */
/* Used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
/* See if can find translation in our partition scoped tables for L1 */
pte = __pte(0);
spin_lock(&kvm->mmu_lock);
pte_p = find_kvm_secondary_pte(kvm, gpa, &shift);
if (!shift)
shift = PAGE_SHIFT;
if (pte_p)
pte = *pte_p;
spin_unlock(&kvm->mmu_lock);
if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
/* No suitable pte found -> try to insert a mapping */
ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
writing, kvm_ro, &pte, &level);
if (ret == -EAGAIN)
return RESUME_GUEST;
else if (ret)
return ret;
shift = kvmppc_radix_level_to_shift(level);
}
/* Align gfn to the start of the page */
gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
/* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
/* The permissions is the combination of the host and l1 guest ptes */
perm |= gpte.may_read ? 0UL : _PAGE_READ;
perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
/* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */
perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
pte = __pte(pte_val(pte) & ~perm);
/* What size pte can we insert? */
if (shift > l1_shift) {
u64 mask;
unsigned int actual_shift = PAGE_SHIFT;
if (PMD_SHIFT < l1_shift)
actual_shift = PMD_SHIFT;
mask = (1UL << shift) - (1UL << actual_shift);
pte = __pte(pte_val(pte) | (gpa & mask));
shift = actual_shift;
}
level = kvmppc_radix_shift_to_level(shift);
n_gpa &= ~((1UL << shift) - 1);
/* 4. Insert the pte into our shadow_pgtable */
n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
if (!n_rmap)
return RESUME_GUEST; /* Let the guest try again */
n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
(((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
kfree(n_rmap);
if (ret == -EAGAIN)
ret = RESUME_GUEST; /* Let the guest try again */
return ret;
inval:
kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
return RESUME_GUEST;
}