in mm/fault.c [518:671]
static int __kprobes do_page_fault(unsigned long far, unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf;
struct mm_struct *mm = current->mm;
vm_fault_t fault;
unsigned long vm_flags;
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
unsigned long addr = untagged_addr(far);
if (kprobe_page_fault(regs, esr))
return 0;
/*
* If we're in an interrupt or have no user context, we must not take
* the fault.
*/
if (faulthandler_disabled() || !mm)
goto no_context;
if (user_mode(regs))
mm_flags |= FAULT_FLAG_USER;
/*
* vm_flags tells us what bits we must have in vma->vm_flags
* for the fault to be benign, __do_page_fault() would check
* vma->vm_flags & vm_flags and returns an error if the
* intersection is empty
*/
if (is_el0_instruction_abort(esr)) {
/* It was exec fault */
vm_flags = VM_EXEC;
mm_flags |= FAULT_FLAG_INSTRUCTION;
} else if (is_write_abort(esr)) {
/* It was write fault */
vm_flags = VM_WRITE;
mm_flags |= FAULT_FLAG_WRITE;
} else {
/* It was read fault */
vm_flags = VM_READ;
/* Write implies read */
vm_flags |= VM_WRITE;
/* If EPAN is absent then exec implies read */
if (!cpus_have_const_cap(ARM64_HAS_EPAN))
vm_flags |= VM_EXEC;
}
if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) {
if (is_el1_instruction_abort(esr))
die_kernel_fault("execution of user memory",
addr, esr, regs);
if (!search_exception_tables(regs->pc))
die_kernel_fault("access to user memory outside uaccess routines",
addr, esr, regs);
}
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
* we can bug out early if this is from code which shouldn't.
*/
if (!mmap_read_trylock(mm)) {
if (!user_mode(regs) && !search_exception_tables(regs->pc))
goto no_context;
retry:
mmap_read_lock(mm);
} else {
/*
* The above mmap_read_trylock() might have succeeded in which
* case, we'll have missed the might_sleep() from down_read().
*/
might_sleep();
#ifdef CONFIG_DEBUG_VM
if (!user_mode(regs) && !search_exception_tables(regs->pc)) {
mmap_read_unlock(mm);
goto no_context;
}
#endif
}
fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
goto no_context;
return 0;
}
if (fault & VM_FAULT_RETRY) {
mm_flags |= FAULT_FLAG_TRIED;
goto retry;
}
mmap_read_unlock(mm);
/*
* Handle the "normal" (no error) case first.
*/
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
VM_FAULT_BADACCESS))))
return 0;
/*
* If we are in kernel mode at this point, we have no context to
* handle this fault with.
*/
if (!user_mode(regs))
goto no_context;
if (fault & VM_FAULT_OOM) {
/*
* We ran out of memory, call the OOM killer, and return to
* userspace (which will retry the fault, or kill us if we got
* oom-killed).
*/
pagefault_out_of_memory();
return 0;
}
inf = esr_to_fault_info(esr);
set_thread_esr(addr, esr);
if (fault & VM_FAULT_SIGBUS) {
/*
* We had some memory, but were unable to successfully fix up
* this page fault.
*/
arm64_force_sig_fault(SIGBUS, BUS_ADRERR, far, inf->name);
} else if (fault & (VM_FAULT_HWPOISON_LARGE | VM_FAULT_HWPOISON)) {
unsigned int lsb;
lsb = PAGE_SHIFT;
if (fault & VM_FAULT_HWPOISON_LARGE)
lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
} else {
/*
* Something tried to access memory that isn't in our memory
* map.
*/
arm64_force_sig_fault(SIGSEGV,
fault == VM_FAULT_BADACCESS ? SEGV_ACCERR : SEGV_MAPERR,
far, inf->name);
}
return 0;
no_context:
__do_kernel_fault(addr, esr, regs);
return 0;
}