arch/arm/mm/fault.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  *  linux/arch/arm/mm/fault.c
   4  *
   5  *  Copyright (C) 1995  Linus Torvalds
   6  *  Modifications for ARM processor (c) 1995-2004 Russell King
   7  */
   8 #include <linux/extable.h>
   9 #include <linux/signal.h>
  10 #include <linux/mm.h>
  11 #include <linux/hardirq.h>
  12 #include <linux/init.h>
  13 #include <linux/kprobes.h>
  14 #include <linux/uaccess.h>
  15 #include <linux/page-flags.h>
  16 #include <linux/sched/signal.h>
  17 #include <linux/sched/debug.h>
  18 #include <linux/highmem.h>
  19 #include <linux/perf_event.h>
  20
  21 #include <asm/system_misc.h>
  22 #include <asm/system_info.h>
  23 #include <asm/tlbflush.h>
  24
  25 #include "fault.h"
  26
  27 #ifdef CONFIG_MMU
  28
  29 /*
  30  * This is useful to dump out the page tables associated with
  31  * 'addr' in mm 'mm'.
  32  */
  33 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
  34 {
  35         pgd_t *pgd;
  36
  37         if (!mm)
  38                 mm = &init_mm;
  39
  40         printk("%spgd = %p\n", lvl, mm->pgd);
  41         pgd = pgd_offset(mm, addr);
  42         printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd));
  43
  44         do {
  45                 p4d_t *p4d;
  46                 pud_t *pud;
  47                 pmd_t *pmd;
  48                 pte_t *pte;
  49
  50                 p4d = p4d_offset(pgd, addr);
  51                 if (p4d_none(*p4d))
  52                         break;
  53
  54                 if (p4d_bad(*p4d)) {
  55                         pr_cont("(bad)");
  56                         break;
  57                 }
  58
  59                 pud = pud_offset(p4d, addr);
  60                 if (PTRS_PER_PUD != 1)
  61                         pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
  62
  63                 if (pud_none(*pud))
  64                         break;
  65
  66                 if (pud_bad(*pud)) {
  67                         pr_cont("(bad)");
  68                         break;
  69                 }
  70
  71                 pmd = pmd_offset(pud, addr);
  72                 if (PTRS_PER_PMD != 1)
  73                         pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
  74
  75                 if (pmd_none(*pmd))
  76                         break;
  77
  78                 if (pmd_bad(*pmd)) {
  79                         pr_cont("(bad)");
  80                         break;
  81                 }
  82
  83                 /* We must not map this if we have highmem enabled */
  84                 if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
  85                         break;
  86
  87                 pte = pte_offset_map(pmd, addr);
  88                 pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
  89 #ifndef CONFIG_ARM_LPAE
  90                 pr_cont(", *ppte=%08llx",
  91                        (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
  92 #endif
  93                 pte_unmap(pte);
  94         } while(0);
  95
  96         pr_cont("\n");
  97 }
  98 #else                                   /* CONFIG_MMU */
  99 void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr)
 100 { }
 101 #endif                                  /* CONFIG_MMU */
 102
 103 /*
 104  * Oops.  The kernel tried to access some page that wasn't present.
 105  */
 106 static void
 107 __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 108                   struct pt_regs *regs)
 109 {
 110         /*
 111          * Are we prepared to handle this kernel fault?
 112          */
 113         if (fixup_exception(regs))
 114                 return;
 115
 116         /*
 117          * No handler, we'll have to terminate things with extreme prejudice.
 118          */
 119         bust_spinlocks(1);
 120         pr_alert("8<--- cut here ---\n");
 121         pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
 122                  (addr < PAGE_SIZE) ? "NULL pointer dereference" :
 123                  "paging request", addr);
 124
 125         show_pte(KERN_ALERT, mm, addr);
 126         die("Oops", regs, fsr);
 127         bust_spinlocks(0);
 128         do_exit(SIGKILL);
 129 }
 130
 131 /*
 132  * Something tried to access memory that isn't in our memory map..
 133  * User mode accesses just cause a SIGSEGV
 134  */
 135 static void
 136 __do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig,
 137                 int code, struct pt_regs *regs)
 138 {
 139         struct task_struct *tsk = current;
 140
 141         if (addr > TASK_SIZE)
 142                 harden_branch_predictor();
 143
 144 #ifdef CONFIG_DEBUG_USER
 145         if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
 146             ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
 147                 pr_err("8<--- cut here ---\n");
 148                 pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
 149                        tsk->comm, sig, addr, fsr);
 150                 show_pte(KERN_ERR, tsk->mm, addr);
 151                 show_regs(regs);
 152         }
 153 #endif
 154 #ifndef CONFIG_KUSER_HELPERS
 155         if ((sig == SIGSEGV) && ((addr & PAGE_MASK) == 0xffff0000))
 156                 printk_ratelimited(KERN_DEBUG
 157                                    "%s: CONFIG_KUSER_HELPERS disabled at 0x%08lx\n",
 158                                    tsk->comm, addr);
 159 #endif
 160
 161         tsk->thread.address = addr;
 162         tsk->thread.error_code = fsr;
 163         tsk->thread.trap_no = 14;
 164         force_sig_fault(sig, code, (void __user *)addr);
 165 }
 166
 167 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 168 {
 169         struct task_struct *tsk = current;
 170         struct mm_struct *mm = tsk->active_mm;
 171
 172         /*
 173          * If we are in kernel mode at this point, we
 174          * have no context to handle this fault with.
 175          */
 176         if (user_mode(regs))
 177                 __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
 178         else
 179                 __do_kernel_fault(mm, addr, fsr, regs);
 180 }
 181
 182 #ifdef CONFIG_MMU
 183 #define VM_FAULT_BADMAP         0x010000
 184 #define VM_FAULT_BADACCESS      0x020000
 185
 186 /*
 187  * Check that the permissions on the VMA allow for the fault which occurred.
 188  * If we encountered a write fault, we must have write permission, otherwise
 189  * we allow any permission.
 190  */
 191 static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
 192 {
 193         unsigned int mask = VM_ACCESS_FLAGS;
 194
 195         if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
 196                 mask = VM_WRITE;
 197         if (fsr & FSR_LNX_PF)
 198                 mask = VM_EXEC;
 199
 200         return vma->vm_flags & mask ? false : true;
 201 }
 202
 203 static vm_fault_t __kprobes
 204 __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 205                 unsigned int flags, struct task_struct *tsk)
 206 {
 207         struct vm_area_struct *vma;
 208         vm_fault_t fault;
 209
 210         vma = find_vma(mm, addr);
 211         fault = VM_FAULT_BADMAP;
 212         if (unlikely(!vma))
 213                 goto out;
 214         if (unlikely(vma->vm_start > addr))
 215                 goto check_stack;
 216
 217         /*
 218          * Ok, we have a good vm_area for this
 219          * memory access, so we can handle it.
 220          */
 221 good_area:
 222         if (access_error(fsr, vma)) {
 223                 fault = VM_FAULT_BADACCESS;
 224                 goto out;
 225         }
 226
 227         return handle_mm_fault(vma, addr & PAGE_MASK, flags);
 228
 229 check_stack:
 230         /* Don't allow expansion below FIRST_USER_ADDRESS */
 231         if (vma->vm_flags & VM_GROWSDOWN &&
 232             addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
 233                 goto good_area;
 234 out:
 235         return fault;
 236 }
 237
 238 static int __kprobes
 239 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 240 {
 241         struct task_struct *tsk;
 242         struct mm_struct *mm;
 243         int sig, code;
 244         vm_fault_t fault;
 245         unsigned int flags = FAULT_FLAG_DEFAULT;
 246
 247         if (kprobe_page_fault(regs, fsr))
 248                 return 0;
 249
 250         tsk = current;
 251         mm  = tsk->mm;
 252
 253         /* Enable interrupts if they were enabled in the parent context. */
 254         if (interrupts_enabled(regs))
 255                 local_irq_enable();
 256
 257         /*
 258          * If we're in an interrupt or have no user
 259          * context, we must not take the fault..
 260          */
 261         if (faulthandler_disabled() || !mm)
 262                 goto no_context;
 263
 264         if (user_mode(regs))
 265                 flags |= FAULT_FLAG_USER;
 266         if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
 267                 flags |= FAULT_FLAG_WRITE;
 268
 269         /*
 270          * As per x86, we may deadlock here.  However, since the kernel only
 271          * validly references user space from well defined areas of the code,
 272          * we can bug out early if this is from code which shouldn't.
 273          */
 274         if (!mmap_read_trylock(mm)) {
 275                 if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
 276                         goto no_context;
 277 retry:
 278                 mmap_read_lock(mm);
 279         } else {
 280                 /*
 281                  * The above down_read_trylock() might have succeeded in
 282                  * which case, we'll have missed the might_sleep() from
 283                  * down_read()
 284                  */
 285                 might_sleep();
 286 #ifdef CONFIG_DEBUG_VM
 287                 if (!user_mode(regs) &&
 288                     !search_exception_tables(regs->ARM_pc))
 289                         goto no_context;
 290 #endif
 291         }
 292
 293         fault = __do_page_fault(mm, addr, fsr, flags, tsk);
 294
 295         /* If we need to retry but a fatal signal is pending, handle the
 296          * signal first. We do not need to release the mmap_sem because
 297          * it would already be released in __lock_page_or_retry in
 298          * mm/filemap.c. */
 299         if (fault_signal_pending(fault, regs)) {
 300                 if (!user_mode(regs))
 301                         goto no_context;
 302                 return 0;
 303         }
 304
 305         /*
 306          * Major/minor page fault accounting is only done on the
 307          * initial attempt. If we go through a retry, it is extremely
 308          * likely that the page will be found in page cache at that point.
 309          */
 310
 311         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 312         if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
 313                 if (fault & VM_FAULT_MAJOR) {
 314                         tsk->maj_flt++;
 315                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
 316                                         regs, addr);
 317                 } else {
 318                         tsk->min_flt++;
 319                         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
 320                                         regs, addr);
 321                 }
 322                 if (fault & VM_FAULT_RETRY) {
 323                         flags |= FAULT_FLAG_TRIED;
 324                         goto retry;
 325                 }
 326         }
 327
 328         mmap_read_unlock(mm);
 329
 330         /*
 331          * Handle the "normal" case first - VM_FAULT_MAJOR
 332          */
 333         if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
 334                 return 0;
 335
 336         /*
 337          * If we are in kernel mode at this point, we
 338          * have no context to handle this fault with.
 339          */
 340         if (!user_mode(regs))
 341                 goto no_context;
 342
 343         if (fault & VM_FAULT_OOM) {
 344                 /*
 345                  * We ran out of memory, call the OOM killer, and return to
 346                  * userspace (which will retry the fault, or kill us if we
 347                  * got oom-killed)
 348                  */
 349                 pagefault_out_of_memory();
 350                 return 0;
 351         }
 352
 353         if (fault & VM_FAULT_SIGBUS) {
 354                 /*
 355                  * We had some memory, but were unable to
 356                  * successfully fix up this page fault.
 357                  */
 358                 sig = SIGBUS;
 359                 code = BUS_ADRERR;
 360         } else {
 361                 /*
 362                  * Something tried to access memory that
 363                  * isn't in our memory map..
 364                  */
 365                 sig = SIGSEGV;
 366                 code = fault == VM_FAULT_BADACCESS ?
 367                         SEGV_ACCERR : SEGV_MAPERR;
 368         }
 369
 370         __do_user_fault(addr, fsr, sig, code, regs);
 371         return 0;
 372
 373 no_context:
 374         __do_kernel_fault(mm, addr, fsr, regs);
 375         return 0;
 376 }
 377 #else                                   /* CONFIG_MMU */
 378 static int
 379 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 380 {
 381         return 0;
 382 }
 383 #endif                                  /* CONFIG_MMU */
 384
 385 /*
 386  * First Level Translation Fault Handler
 387  *
 388  * We enter here because the first level page table doesn't contain
 389  * a valid entry for the address.
 390  *
 391  * If the address is in kernel space (>= TASK_SIZE), then we are
 392  * probably faulting in the vmalloc() area.
 393  *
 394  * If the init_task's first level page tables contains the relevant
 395  * entry, we copy the it to this task.  If not, we send the process
 396  * a signal, fixup the exception, or oops the kernel.
 397  *
 398  * NOTE! We MUST NOT take any locks for this case. We may be in an
 399  * interrupt or a critical region, and should only copy the information
 400  * from the master page table, nothing more.
 401  */
 402 #ifdef CONFIG_MMU
 403 static int __kprobes
 404 do_translation_fault(unsigned long addr, unsigned int fsr,
 405                      struct pt_regs *regs)
 406 {
 407         unsigned int index;
 408         pgd_t *pgd, *pgd_k;
 409         p4d_t *p4d, *p4d_k;
 410         pud_t *pud, *pud_k;
 411         pmd_t *pmd, *pmd_k;
 412
 413         if (addr < TASK_SIZE)
 414                 return do_page_fault(addr, fsr, regs);
 415
 416         if (user_mode(regs))
 417                 goto bad_area;
 418
 419         index = pgd_index(addr);
 420
 421         pgd = cpu_get_pgd() + index;
 422         pgd_k = init_mm.pgd + index;
 423
 424         p4d = p4d_offset(pgd, addr);
 425         p4d_k = p4d_offset(pgd_k, addr);
 426
 427         if (p4d_none(*p4d_k))
 428                 goto bad_area;
 429         if (!p4d_present(*p4d))
 430                 set_p4d(p4d, *p4d_k);
 431
 432         pud = pud_offset(p4d, addr);
 433         pud_k = pud_offset(p4d_k, addr);
 434
 435         if (pud_none(*pud_k))
 436                 goto bad_area;
 437         if (!pud_present(*pud))
 438                 set_pud(pud, *pud_k);
 439
 440         pmd = pmd_offset(pud, addr);
 441         pmd_k = pmd_offset(pud_k, addr);
 442
 443 #ifdef CONFIG_ARM_LPAE
 444         /*
 445          * Only one hardware entry per PMD with LPAE.
 446          */
 447         index = 0;
 448 #else
 449         /*
 450          * On ARM one Linux PGD entry contains two hardware entries (see page
 451          * tables layout in pgtable.h). We normally guarantee that we always
 452          * fill both L1 entries. But create_mapping() doesn't follow the rule.
 453          * It can create inidividual L1 entries, so here we have to call
 454          * pmd_none() check for the entry really corresponded to address, not
 455          * for the first of pair.
 456          */
 457         index = (addr >> SECTION_SHIFT) & 1;
 458 #endif
 459         if (pmd_none(pmd_k[index]))
 460                 goto bad_area;
 461
 462         copy_pmd(pmd, pmd_k);
 463         return 0;
 464
 465 bad_area:
 466         do_bad_area(addr, fsr, regs);
 467         return 0;
 468 }
 469 #else                                   /* CONFIG_MMU */
 470 static int
 471 do_translation_fault(unsigned long addr, unsigned int fsr,
 472                      struct pt_regs *regs)
 473 {
 474         return 0;
 475 }
 476 #endif                                  /* CONFIG_MMU */
 477
 478 /*
 479  * Some section permission faults need to be handled gracefully.
 480  * They can happen due to a __{get,put}_user during an oops.
 481  */
 482 #ifndef CONFIG_ARM_LPAE
 483 static int
 484 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 485 {
 486         do_bad_area(addr, fsr, regs);
 487         return 0;
 488 }
 489 #endif /* CONFIG_ARM_LPAE */
 490
 491 /*
 492  * This abort handler always returns "fault".
 493  */
 494 static int
 495 do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 496 {
 497         return 1;
 498 }
 499
 500 struct fsr_info {
 501         int     (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
 502         int     sig;
 503         int     code;
 504         const char *name;
 505 };
 506
 507 /* FSR definition */
 508 #ifdef CONFIG_ARM_LPAE
 509 #include "fsr-3level.c"
 510 #else
 511 #include "fsr-2level.c"
 512 #endif
 513
 514 void __init
 515 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 516                 int sig, int code, const char *name)
 517 {
 518         if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
 519                 BUG();
 520
 521         fsr_info[nr].fn   = fn;
 522         fsr_info[nr].sig  = sig;
 523         fsr_info[nr].code = code;
 524         fsr_info[nr].name = name;
 525 }
 526
 527 /*
 528  * Dispatch a data abort to the relevant handler.
 529  */
 530 asmlinkage void
 531 do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 532 {
 533         const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
 534
 535         if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
 536                 return;
 537
 538         pr_alert("8<--- cut here ---\n");
 539         pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
 540                 inf->name, fsr, addr);
 541         show_pte(KERN_ALERT, current->mm, addr);
 542
 543         arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
 544                        fsr, 0);
 545 }
 546
 547 void __init
 548 hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
 549                  int sig, int code, const char *name)
 550 {
 551         if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info))
 552                 BUG();
 553
 554         ifsr_info[nr].fn   = fn;
 555         ifsr_info[nr].sig  = sig;
 556         ifsr_info[nr].code = code;
 557         ifsr_info[nr].name = name;
 558 }
 559
 560 asmlinkage void
 561 do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
 562 {
 563         const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
 564
 565         if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
 566                 return;
 567
 568         pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
 569                 inf->name, ifsr, addr);
 570
 571         arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr,
 572                        ifsr, 0);
 573 }
 574
 575 /*
 576  * Abort handler to be used only during first unmasking of asynchronous aborts
 577  * on the boot CPU. This makes sure that the machine will not die if the
 578  * firmware/bootloader left an imprecise abort pending for us to trip over.
 579  */
 580 static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
 581                                       struct pt_regs *regs)
 582 {
 583         pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during "
 584                 "first unmask, this is most likely caused by a "
 585                 "firmware/bootloader bug.\n", fsr);
 586
 587         return 0;
 588 }
 589
 590 void __init early_abt_enable(void)
 591 {
 592         fsr_info[FSR_FS_AEA].fn = early_abort_handler;
 593         local_abt_enable();
 594         fsr_info[FSR_FS_AEA].fn = do_bad;
 595 }
 596
 597 #ifndef CONFIG_ARM_LPAE
 598 static int __init exceptions_init(void)
 599 {
 600         if (cpu_architecture() >= CPU_ARCH_ARMv6) {
 601                 hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
 602                                 "I-cache maintenance fault");
 603         }
 604
 605         if (cpu_architecture() >= CPU_ARCH_ARMv7) {
 606                 /*
 607                  * TODO: Access flag faults introduced in ARMv6K.
 608                  * Runtime check for 'K' extension is needed
 609                  */
 610                 hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
 611                                 "section access flag fault");
 612                 hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
 613                                 "section access flag fault");
 614         }
 615
 616         return 0;
 617 }
 618
 619 arch_initcall(exceptions_init);
 620 #endif