arch/arm/mm/fault.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  *  linux/arch/arm/mm/fault.c
   4  *
   5  *  Copyright (C) 1995  Linus Torvalds
   6  *  Modifications for ARM processor (c) 1995-2004 Russell King
   7  */
   8 #include <linux/extable.h>
   9 #include <linux/signal.h>
  10 #include <linux/mm.h>
  11 #include <linux/hardirq.h>
  12 #include <linux/init.h>
  13 #include <linux/kprobes.h>
  14 #include <linux/uaccess.h>
  15 #include <linux/page-flags.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/sched/debug.h>
  18 #include <linux/highmem.h>
  19 #include <linux/perf_event.h>
  20
  21 #include <asm/pgtable.h>
  22 #include <asm/system_misc.h>
  23 #include <asm/system_info.h>
  24 #include <asm/tlbflush.h>
  25
  26 #include "fault.h"
  27
  28 #ifdef CONFIG_MMU
  29
  30 /*
  31  * This is useful to dump out the page tables associated with
  32  * 'addr' in mm 'mm'.
  33  */
  34 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
  35 {
  36         pgd_t *pgd;
  37
  38         if (!mm)
  39                 mm = &init_mm;
  40
  41         printk("%spgd = %p\n", lvl, mm->pgd);
  42         pgd = pgd_offset(mm, addr);
  43         printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
  44
  45         do {
  46                 p4d_t *p4d;
  47                 pud_t *pud;
  48                 pmd_t *pmd;
  49                 pte_t *pte;
  50
  51                 p4d = p4d_offset(pgd, addr);
  52                 if (p4d_none(*p4d))
  53                         break;
  54
  55                 if (p4d_bad(*p4d)) {
  56                         pr_cont("(bad)");
  57                         break;
  58                 }
  59
  60                 pud = pud_offset(p4d, addr);
  61                 if (PTRS_PER_PUD != 1)
  62                         pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
  63
  64                 if (pud_none(*pud))
  65                         break;
  66
  67                 if (pud_bad(*pud)) {
  68                         pr_cont("(bad)");
  69                         break;
  70                 }
  71
  72                 pmd = pmd_offset(pud, addr);
  73                 if (PTRS_PER_PMD != 1)
  74                         pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
  75
  76                 if (pmd_none(*pmd))
  77                         break;
  78
  79                 if (pmd_bad(*pmd)) {
  80                         pr_cont("(bad)");
  81                         break;
  82                 }
  83
  84                 /* We must not map this if we have highmem enabled */
  85                 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
  86                         break;
  87
  88                 pte = pte_offset_map(pmd, addr);
  89                 pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
  90 #ifndef CONFIG_ARM_LPAE
  91                 pr_cont(", *ppte=%08llx",
  92                        (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
  93 #endif
  94                 pte_unmap(pte);
  95         } while(0);
  96
  97         pr_cont("\n");
  98 }
  99 #else                                   /* CONFIG_MMU */
 100 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
 101 { }
 102 #endif                                  /* CONFIG_MMU */
 103
 104 /*
 105  * Oops.  The kernel tried to access some page that wasn't present.
 106  */
 107 static void
 108 __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 109                   struct pt_regs *regs)
 110 {
 111         /*
 112          * Are we prepared to handle this kernel fault?
 113          */
 114         if (fixup_exception(regs))
 115                 return;
 116
 117         /*
 118          * No handler, we'll have to terminate things with extreme prejudice.
 119          */
 120         bust_spinlocks(1);
 121         pr_alert("8<--- cut here ---\n");
 122         pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
 123                  (addr < PAGE_SIZE) ? "NULL pointer dereference" :
 124                  "paging request", addr);
 125
 126         show_pte(KERN_ALERT, mm, addr);
 127         die("Oops", regs, fsr);
 128         bust_spinlocks(0);
 129         do_exit(SIGKILL);
 130 }
 131
 132 /*
 133  * Something tried to access memory that isn't in our memory map..
 134  * User mode accesses just cause a SIGSEGV
 135  */
 136 static void
 137 __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
 138                 int code, struct pt_regs *regs)
 139 {
 140         struct task_struct *tsk = current;
 141
 142         if (addr > TASK_SIZE)
 143                 harden_branch_predictor();
 144
 145 #ifdef CONFIG_DEBUG_USER
 146         if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
 147             ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
 148                 pr_err("8<--- cut here ---\n");
 149                 pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
 150                        tsk->comm, sig, addr, fsr);
 151                 show_pte(KERN_ERR, tsk->mm, addr);
 152                 show_regs(regs);
 153         }
 154 #endif
 155 #ifndef CONFIG_KUSER_HELPERS
 156         if ((sig == SIGSEGV) && ((addr & PAGE_MASK) == 0xffff0000))
 157                 printk_ratelimited(KERN_DEBUG
 158                                    "%s: CONFIG_KUSER_HELPERS disabled at 0x%08lx\n",
 159                                    tsk->comm, addr);
 160 #endif
 161
 162         tsk->thread.address = addr;
 163         tsk->thread.error_code = fsr;
 164         tsk->thread.trap_no = 14;
 165         force_sig_fault(sig, code, (void __user *)addr);
 166 }
 167
 168 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 169 {
 170         struct task_struct *tsk = current;
 171         struct mm_struct *mm = tsk->active_mm;
 172
 173         /*
 174          * If we are in kernel mode at this point, we
 175          * have no context to handle this fault with.
 176          */
 177         if (user_mode(regs))
 178                 __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
 179         else
 180                 __do_kernel_fault(mm, addr, fsr, regs);
 181 }
 182
 183 #ifdef CONFIG_MMU
 184 #define VM_FAULT_BADMAP         0x010000
 185 #define VM_FAULT_BADACCESS      0x020000
 186
 187 /*
 188  * Check that the permissions on the VMA allow for the fault which occurred.
 189  * If we encountered a write fault, we must have write permission, otherwise
 190  * we allow any permission.
 191  */
 192 static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
 193 {
 194         unsigned int mask = VM_ACCESS_FLAGS;
 195
 196         if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
 197                 mask = VM_WRITE;
 198         if (fsr & FSR_LNX_PF)
 199                 mask = VM_EXEC;
 200
 201         return vma->vm_flags & mask ? false : true;
 202 }
 203
 204 static vm_fault_t __kprobes
 205 __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 206                 unsigned int flags, struct task_struct *tsk)
 207 {
 208         struct vm_area_struct *vma;
 209         vm_fault_t fault;
 210
 211         vma = find_vma(mm, addr);
 212         fault = VM_FAULT_BADMAP;
 213         if (unlikely(!vma))
 214                 goto out;
 215         if (unlikely(vma->vm_start > addr))
 216                 goto check_stack;
 217
 218         /*
 219          * Ok, we have a good vm_area for this
 220          * memory access, so we can handle it.
 221          */
 222 good_area:
 223         if (access_error(fsr, vma)) {
 224                 fault = VM_FAULT_BADACCESS;
 225                 goto out;
 226         }
 227
 228         return handle_mm_fault(vma, addr & PAGE_MASK, flags);
 229
 230 check_stack:
 231         /* Don't allow expansion below FIRST_USER_ADDRESS */
 232         if (vma->vm_flags & VM_GROWSDOWN &&
 233             addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
 234                 goto good_area;
 235 out:
 236         return fault;
 237 }
 238
 239 static int __kprobes
 240 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 241 {
 242         struct task_struct *tsk;
 243         struct mm_struct *mm;
 244         int sig, code;
 245         vm_fault_t fault;
 246         unsigned int flags = FAULT_FLAG_DEFAULT;
 247
 248         if (kprobe_page_fault(regs, fsr))
 249                 return 0;
 250
 251         tsk = current;
 252         mm  = tsk->mm;
 253
 254         /* Enable interrupts if they were enabled in the parent context. */
 255         if (interrupts_enabled(regs))
 256                 local_irq_enable();
 257
 258         /*
 259          * If we're in an interrupt or have no user
 260          * context, we must not take the fault..
 261          */
 262         if (faulthandler_disabled() || !mm)
 263                 goto no_context;
 264
 265         if (user_mode(regs))
 266                 flags |= FAULT_FLAG_USER;
 267         if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
 268                 flags |= FAULT_FLAG_WRITE;
 269
 270         /*
 271          * As per x86, we may deadlock here.  However, since the kernel only
 272          * validly references user space from well defined areas of the code,
 273          * we can bug out early if this is from code which shouldn't.
 274          */
 275         if (!down_read_trylock(&mm->mmap_sem)) {
 276                 if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
 277                         goto no_context;
 278 retry:
 279                 down_read(&mm->mmap_sem);
 280         } else {
 281                 /*
 282                  * The above down_read_trylock() might have succeeded in
 283                  * which case, we'll have missed the might_sleep() from
 284                  * down_read()
 285                  */
 286                 might_sleep();
 287 #ifdef CONFIG_DEBUG_VM
 288                 if (!user_mode(regs) &&
 289                     !search_exception_tables(regs->ARM_pc))
 290                         goto no_context;
 291 #endif
 292         }
 293
 294         fault = __do_page_fault(mm, addr, fsr, flags, tsk);
 295
 296         /* If we need to retry but a fatal signal is pending, handle the
 297          * signal first. We do not need to release the mmap_sem because
 298          * it would already be released in __lock_page_or_retry in
 299          * mm/filemap.c. */
 300         if (fault_signal_pending(fault, regs)) {
 301                 if (!user_mode(regs))
 302                         goto no_context;
 303                 return 0;
 304         }
 305
 306         /*
 307          * Major/minor page fault accounting is only done on the
 308          * initial attempt. If we go through a retry, it is extremely
 309          * likely that the page will be found in page cache at that point.
 310          */
 311
 312         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 313         if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
 314                 if (fault & VM_FAULT_MAJOR) {
 315                         tsk->maj_flt++;
 316                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
 317                                         regs, addr);
 318                 } else {
 319                         tsk->min_flt++;
 320                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
 321                                         regs, addr);
 322                 }
 323                 if (fault & VM_FAULT_RETRY) {
 324                         flags |= FAULT_FLAG_TRIED;
 325                         goto retry;
 326                 }
 327         }
 328
 329         up_read(&mm->mmap_sem);
 330
 331         /*
 332          * Handle the "normal" case first - VM_FAULT_MAJOR
 333          */
 334         if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
 335                 return 0;
 336
 337         /*
 338          * If we are in kernel mode at this point, we
 339          * have no context to handle this fault with.
 340          */
 341         if (!user_mode(regs))
 342                 goto no_context;
 343
 344         if (fault & VM_FAULT_OOM) {
 345                 /*
 346                  * We ran out of memory, call the OOM killer, and return to
 347                  * userspace (which will retry the fault, or kill us if we
 348                  * got oom-killed)
 349                  */
 350                 pagefault_out_of_memory();
 351                 return 0;
 352         }
 353
 354         if (fault & VM_FAULT_SIGBUS) {
 355                 /*
 356                  * We had some memory, but were unable to
 357                  * successfully fix up this page fault.
 358                  */
 359                 sig = SIGBUS;
 360                 code = BUS_ADRERR;
 361         } else {
 362                 /*
 363                  * Something tried to access memory that
 364                  * isn't in our memory map..
 365                  */
 366                 sig = SIGSEGV;
 367                 code = fault == VM_FAULT_BADACCESS ?
 368                         SEGV_ACCERR : SEGV_MAPERR;
 369         }
 370
 371         __do_user_fault(addr, fsr, sig, code, regs);
 372         return 0;
 373
 374 no_context:
 375         __do_kernel_fault(mm, addr, fsr, regs);
 376         return 0;
 377 }
 378 #else                                   /* CONFIG_MMU */
 379 static int
 380 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 381 {
 382         return 0;
 383 }
 384 #endif                                  /* CONFIG_MMU */
 385
 386 /*
 387  * First Level Translation Fault Handler
 388  *
 389  * We enter here because the first level page table doesn't contain
 390  * a valid entry for the address.
 391  *
 392  * If the address is in kernel space (>= TASK_SIZE), then we are
 393  * probably faulting in the vmalloc() area.
 394  *
 395  * If the init_task's first level page tables contains the relevant
 396  * entry, we copy the it to this task.  If not, we send the process
 397  * a signal, fixup the exception, or oops the kernel.
 398  *
 399  * NOTE! We MUST NOT take any locks for this case. We may be in an
 400  * interrupt or a critical region, and should only copy the information
 401  * from the master page table, nothing more.
 402  */
 403 #ifdef CONFIG_MMU
 404 static int __kprobes
 405 do_translation_fault(unsigned long addr, unsigned int fsr,
 406                      struct pt_regs *regs)
 407 {
 408         unsigned int index;
 409         pgd_t *pgd, *pgd_k;
 410         p4d_t *p4d, *p4d_k;
 411         pud_t *pud, *pud_k;
 412         pmd_t *pmd, *pmd_k;
 413
 414         if (addr < TASK_SIZE)
 415                 return do_page_fault(addr, fsr, regs);
 416
 417         if (user_mode(regs))
 418                 goto bad_area;
 419
 420         index = pgd_index(addr);
 421
 422         pgd = cpu_get_pgd() + index;
 423         pgd_k = init_mm.pgd + index;
 424
 425         p4d = p4d_offset(pgd, addr);
 426         p4d_k = p4d_offset(pgd_k, addr);
 427
 428         if (p4d_none(*p4d_k))
 429                 goto bad_area;
 430         if (!p4d_present(*p4d))
 431                 set_p4d(p4d, *p4d_k);
 432
 433         pud = pud_offset(p4d, addr);
 434         pud_k = pud_offset(p4d_k, addr);
 435
 436         if (pud_none(*pud_k))
 437                 goto bad_area;
 438         if (!pud_present(*pud))
 439                 set_pud(pud, *pud_k);
 440
 441         pmd = pmd_offset(pud, addr);
 442         pmd_k = pmd_offset(pud_k, addr);
 443
 444 #ifdef CONFIG_ARM_LPAE
 445         /*
 446          * Only one hardware entry per PMD with LPAE.
 447          */
 448         index = 0;
 449 #else
 450         /*
 451          * On ARM one Linux PGD entry contains two hardware entries (see page
 452          * tables layout in pgtable.h). We normally guarantee that we always
 453          * fill both L1 entries. But create_mapping() doesn't follow the rule.
 454          * It can create inidividual L1 entries, so here we have to call
 455          * pmd_none() check for the entry really corresponded to address, not
 456          * for the first of pair.
 457          */
 458         index = (addr >> SECTION_SHIFT) & 1;
 459 #endif
 460         if (pmd_none(pmd_k[index]))
 461                 goto bad_area;
 462
 463         copy_pmd(pmd, pmd_k);
 464         return 0;
 465
 466 bad_area:
 467         do_bad_area(addr, fsr, regs);
 468         return 0;
 469 }
 470 #else                                   /* CONFIG_MMU */
 471 static int
 472 do_translation_fault(unsigned long addr, unsigned int fsr,
 473                      struct pt_regs *regs)
 474 {
 475         return 0;
 476 }
 477 #endif                                  /* CONFIG_MMU */
 478
 479 /*
 480  * Some section permission faults need to be handled gracefully.
 481  * They can happen due to a __{get,put}_user during an oops.
 482  */
 483 #ifndef CONFIG_ARM_LPAE
 484 static int
 485 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 486 {
 487         do_bad_area(addr, fsr, regs);
 488         return 0;
 489 }
 490 #endif /* CONFIG_ARM_LPAE */
 491
 492 /*
 493  * This abort handler always returns "fault".
 494  */
 495 static int
 496 do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 497 {
 498         return 1;
 499 }
 500
 501 struct fsr_info {
 502         int     (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
 503         int     sig;
 504         int     code;
 505         const char *name;
 506 };
 507
 508 /* FSR definition */
 509 #ifdef CONFIG_ARM_LPAE
 510 #include "fsr-3level.c"
 511 #else
 512 #include "fsr-2level.c"
 513 #endif
 514
 515 void __init
 516 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 517                 int sig, int code, const char *name)
 518 {
 519         if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
 520                 BUG();
 521
 522         fsr_info[nr].fn   = fn;
 523         fsr_info[nr].sig  = sig;
 524         fsr_info[nr].code = code;
 525         fsr_info[nr].name = name;
 526 }
 527
 528 /*
 529  * Dispatch a data abort to the relevant handler.
 530  */
 531 asmlinkage void
 532 do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 533 {
 534         const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
 535
 536         if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
 537                 return;
 538
 539         pr_alert("8<--- cut here ---\n");
 540         pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 541                 inf->name, fsr, addr);
 542         show_pte(KERN_ALERT, current->mm, addr);
 543
 544         arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
 545                        fsr, 0);
 546 }
 547
 548 void __init
 549 hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 550                  int sig, int code, const char *name)
 551 {
 552         if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info))
 553                 BUG();
 554
 555         ifsr_info[nr].fn   = fn;
 556         ifsr_info[nr].sig  = sig;
 557         ifsr_info[nr].code = code;
 558         ifsr_info[nr].name = name;
 559 }
 560
 561 asmlinkage void
 562 do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
 563 {
 564         const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
 565
 566         if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
 567                 return;
 568
 569         pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 570                 inf->name, ifsr, addr);
 571
 572         arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
 573                        ifsr, 0);
 574 }
 575
 576 /*
 577  * Abort handler to be used only during first unmasking of asynchronous aborts
 578  * on the boot CPU. This makes sure that the machine will not die if the
 579  * firmware/bootloader left an imprecise abort pending for us to trip over.
 580  */
 581 static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
 582                                       struct pt_regs *regs)
 583 {
 584         pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during "
 585                 "first unmask, this is most likely caused by a "
 586                 "firmware/bootloader bug.\n", fsr);
 587
 588         return 0;
 589 }
 590
 591 void __init early_abt_enable(void)
 592 {
 593         fsr_info[FSR_FS_AEA].fn = early_abort_handler;
 594         local_abt_enable();
 595         fsr_info[FSR_FS_AEA].fn = do_bad;
 596 }
 597
 598 #ifndef CONFIG_ARM_LPAE
 599 static int __init exceptions_init(void)
 600 {
 601         if (cpu_architecture() >= CPU_ARCH_ARMv6) {
 602                 hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
 603                                 "I-cache maintenance fault");
 604         }
 605
 606         if (cpu_architecture() >= CPU_ARCH_ARMv7) {
 607                 /*
 608                  * TODO: Access flag faults introduced in ARMv6K.
 609                  * Runtime check for 'K' extension is needed
 610                  */
 611                 hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
 612                                 "section access flag fault");
 613                 hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
 614                                 "section access flag fault");
 615         }
 616
 617         return 0;
 618 }
 619
 620 arch_initcall(exceptions_init);
 621 #endif