include/linux/huge_mm.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 #ifndef _LINUX_HUGE_MM_H
   3 #define _LINUX_HUGE_MM_H
   4
   5 #include <linux/sched/coredump.h>
   6 #include <linux/mm_types.h>
   7
   8 #include <linux/fs.h> /* only for vma_is_dax() */
   9
  10 vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
  11 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
  12                   pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
  13                   struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma);
  14 void huge_pmd_set_accessed(struct vm_fault *vmf);
  15 int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
  16                   pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
  17                   struct vm_area_struct *vma);
  18
  19 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
  20 void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
  21 #else
  22 static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
  23 {
  24 }
  25 #endif
  26
  27 vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
  28 bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
  29                            pmd_t *pmd, unsigned long addr, unsigned long next);
  30 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
  31                  unsigned long addr);
  32 int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud,
  33                  unsigned long addr);
  34 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
  35                    unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd);
  36 int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
  37                     pmd_t *pmd, unsigned long addr, pgprot_t newprot,
  38                     unsigned long cp_flags);
  39
  40 vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
  41 vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
  42
  43 enum transparent_hugepage_flag {
  44         TRANSPARENT_HUGEPAGE_UNSUPPORTED,
  45         TRANSPARENT_HUGEPAGE_FLAG,
  46         TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
  47         TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
  48         TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
  49         TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG,
  50         TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
  51         TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
  52         TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
  53 };
  54
  55 struct kobject;
  56 struct kobj_attribute;
  57
  58 ssize_t single_hugepage_flag_store(struct kobject *kobj,
  59                                    struct kobj_attribute *attr,
  60                                    const char *buf, size_t count,
  61                                    enum transparent_hugepage_flag flag);
  62 ssize_t single_hugepage_flag_show(struct kobject *kobj,
  63                                   struct kobj_attribute *attr, char *buf,
  64                                   enum transparent_hugepage_flag flag);
  65 extern struct kobj_attribute shmem_enabled_attr;
  66
  67 /*
  68  * Mask of all large folio orders supported for anonymous THP; all orders up to
  69  * and including PMD_ORDER, except order-0 (which is not "huge") and order-1
  70  * (which is a limitation of the THP implementation).
  71  */
  72 #define THP_ORDERS_ALL_ANON     ((BIT(PMD_ORDER + 1) - 1) & ~(BIT(0) | BIT(1)))
  73
  74 /*
  75  * Mask of all large folio orders supported for file THP.
  76  */
  77 #define THP_ORDERS_ALL_FILE     (BIT(PMD_ORDER) | BIT(PUD_ORDER))
  78
  79 /*
  80  * Mask of all large folio orders supported for THP.
  81  */
  82 #define THP_ORDERS_ALL          (THP_ORDERS_ALL_ANON | THP_ORDERS_ALL_FILE)
  83
  84 #define TVA_SMAPS               (1 << 0)        /* Will be used for procfs */
  85 #define TVA_IN_PF               (1 << 1)        /* Page fault handler */
  86 #define TVA_ENFORCE_SYSFS       (1 << 2)        /* Obey sysfs configuration */
  87
  88 #define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
  89         (!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))
  90
  91 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
  92 #define HPAGE_PMD_SHIFT PMD_SHIFT
  93 #define HPAGE_PUD_SHIFT PUD_SHIFT
  94 #else
  95 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
  96 #define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; })
  97 #endif
  98
  99 #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
 100 #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
 101 #define HPAGE_PMD_MASK  (~(HPAGE_PMD_SIZE - 1))
 102 #define HPAGE_PMD_SIZE  ((1UL) << HPAGE_PMD_SHIFT)
 103
 104 #define HPAGE_PUD_ORDER (HPAGE_PUD_SHIFT-PAGE_SHIFT)
 105 #define HPAGE_PUD_NR (1<<HPAGE_PUD_ORDER)
 106 #define HPAGE_PUD_MASK  (~(HPAGE_PUD_SIZE - 1))
 107 #define HPAGE_PUD_SIZE  ((1UL) << HPAGE_PUD_SHIFT)
 108
 109 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 110
 111 extern unsigned long transparent_hugepage_flags;
 112 extern unsigned long huge_anon_orders_always;
 113 extern unsigned long huge_anon_orders_madvise;
 114 extern unsigned long huge_anon_orders_inherit;
 115
 116 static inline bool hugepage_global_enabled(void)
 117 {
 118         return transparent_hugepage_flags &
 119                         ((1<<TRANSPARENT_HUGEPAGE_FLAG) |
 120                         (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG));
 121 }
 122
 123 static inline bool hugepage_global_always(void)
 124 {
 125         return transparent_hugepage_flags &
 126                         (1<<TRANSPARENT_HUGEPAGE_FLAG);
 127 }
 128
 129 static inline bool hugepage_flags_enabled(void)
 130 {
 131         /*
 132          * We cover both the anon and the file-backed case here; we must return
 133          * true if globally enabled, even when all anon sizes are set to never.
 134          * So we don't need to look at huge_anon_orders_inherit.
 135          */
 136         return hugepage_global_enabled() ||
 137                huge_anon_orders_always ||
 138                huge_anon_orders_madvise;
 139 }
 140
 141 static inline int highest_order(unsigned long orders)
 142 {
 143         return fls_long(orders) - 1;
 144 }
 145
 146 static inline int next_order(unsigned long *orders, int prev)
 147 {
 148         *orders &= ~BIT(prev);
 149         return highest_order(*orders);
 150 }
 151
 152 /*
 153  * Do the below checks:
 154  *   - For file vma, check if the linear page offset of vma is
 155  *     order-aligned within the file.  The hugepage is
 156  *     guaranteed to be order-aligned within the file, but we must
 157  *     check that the order-aligned addresses in the VMA map to
 158  *     order-aligned offsets within the file, else the hugepage will
 159  *     not be mappable.
 160  *   - For all vmas, check if the haddr is in an aligned hugepage
 161  *     area.
 162  */
 163 static inline bool thp_vma_suitable_order(struct vm_area_struct *vma,
 164                 unsigned long addr, int order)
 165 {
 166         unsigned long hpage_size = PAGE_SIZE << order;
 167         unsigned long haddr;
 168
 169         /* Don't have to check pgoff for anonymous vma */
 170         if (!vma_is_anonymous(vma)) {
 171                 if (!IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
 172                                 hpage_size >> PAGE_SHIFT))
 173                         return false;
 174         }
 175
 176         haddr = ALIGN_DOWN(addr, hpage_size);
 177
 178         if (haddr < vma->vm_start || haddr + hpage_size > vma->vm_end)
 179                 return false;
 180         return true;
 181 }
 182
 183 /*
 184  * Filter the bitfield of input orders to the ones suitable for use in the vma.
 185  * See thp_vma_suitable_order().
 186  * All orders that pass the checks are returned as a bitfield.
 187  */
 188 static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
 189                 unsigned long addr, unsigned long orders)
 190 {
 191         int order;
 192
 193         /*
 194          * Iterate over orders, highest to lowest, removing orders that don't
 195          * meet alignment requirements from the set. Exit loop at first order
 196          * that meets requirements, since all lower orders must also meet
 197          * requirements.
 198          */
 199
 200         order = highest_order(orders);
 201
 202         while (orders) {
 203                 if (thp_vma_suitable_order(vma, addr, order))
 204                         break;
 205                 order = next_order(&orders, order);
 206         }
 207
 208         return orders;
 209 }
 210
 211 static inline bool file_thp_enabled(struct vm_area_struct *vma)
 212 {
 213         struct inode *inode;
 214
 215         if (!vma->vm_file)
 216                 return false;
 217
 218         inode = vma->vm_file->f_inode;
 219
 220         return (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS)) &&
 221                !inode_is_open_for_write(inode) && S_ISREG(inode->i_mode);
 222 }
 223
 224 unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 225                                          unsigned long vm_flags,
 226                                          unsigned long tva_flags,
 227                                          unsigned long orders);
 228
 229 /**
 230  * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma
 231  * @vma:  the vm area to check
 232  * @vm_flags: use these vm_flags instead of vma->vm_flags
 233  * @tva_flags: Which TVA flags to honour
 234  * @orders: bitfield of all orders to consider
 235  *
 236  * Calculates the intersection of the requested hugepage orders and the allowed
 237  * hugepage orders for the provided vma. Permitted orders are encoded as a set
 238  * bit at the corresponding bit position (bit-2 corresponds to order-2, bit-3
 239  * corresponds to order-3, etc). Order-0 is never considered a hugepage order.
 240  *
 241  * Return: bitfield of orders allowed for hugepage in the vma. 0 if no hugepage
 242  * orders are allowed.
 243  */
 244 static inline
 245 unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 246                                        unsigned long vm_flags,
 247                                        unsigned long tva_flags,
 248                                        unsigned long orders)
 249 {
 250         /* Optimization to check if required orders are enabled early. */
 251         if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) {
 252                 unsigned long mask = READ_ONCE(huge_anon_orders_always);
 253
 254                 if (vm_flags & VM_HUGEPAGE)
 255                         mask |= READ_ONCE(huge_anon_orders_madvise);
 256                 if (hugepage_global_always() ||
 257                     ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled()))
 258                         mask |= READ_ONCE(huge_anon_orders_inherit);
 259
 260                 orders &= mask;
 261                 if (!orders)
 262                         return 0;
 263         }
 264
 265         return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders);
 266 }
 267
 268 enum mthp_stat_item {
 269         MTHP_STAT_ANON_FAULT_ALLOC,
 270         MTHP_STAT_ANON_FAULT_FALLBACK,
 271         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
 272         MTHP_STAT_ANON_SWPOUT,
 273         MTHP_STAT_ANON_SWPOUT_FALLBACK,
 274         __MTHP_STAT_COUNT
 275 };
 276
 277 struct mthp_stat {
 278         unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT];
 279 };
 280
 281 DECLARE_PER_CPU(struct mthp_stat, mthp_stats);
 282
 283 static inline void count_mthp_stat(int order, enum mthp_stat_item item)
 284 {
 285         if (order <= 0 || order > PMD_ORDER)
 286                 return;
 287
 288         this_cpu_inc(mthp_stats.stats[order][item]);
 289 }
 290
 291 #define transparent_hugepage_use_zero_page()                            \
 292         (transparent_hugepage_flags &                                   \
 293          (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
 294
 295 unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 296                 unsigned long len, unsigned long pgoff, unsigned long flags);
 297 unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
 298                 unsigned long len, unsigned long pgoff, unsigned long flags,
 299                 vm_flags_t vm_flags);
 300
 301 bool can_split_folio(struct folio *folio, int *pextra_pins);
 302 int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 303                 unsigned int new_order);
 304 static inline int split_huge_page(struct page *page)
 305 {
 306         return split_huge_page_to_list_to_order(page, NULL, 0);
 307 }
 308 void deferred_split_folio(struct folio *folio);
 309
 310 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 311                 unsigned long address, bool freeze, struct folio *folio);
 312
 313 #define split_huge_pmd(__vma, __pmd, __address)                         \
 314         do {                                                            \
 315                 pmd_t *____pmd = (__pmd);                               \
 316                 if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)   \
 317                                         || pmd_devmap(*____pmd))        \
 318                         __split_huge_pmd(__vma, __pmd, __address,       \
 319                                                 false, NULL);           \
 320         }  while (0)
 321
 322
 323 void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 324                 bool freeze, struct folio *folio);
 325
 326 void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
 327                 unsigned long address);
 328
 329 #define split_huge_pud(__vma, __pud, __address)                         \
 330         do {                                                            \
 331                 pud_t *____pud = (__pud);                               \
 332                 if (pud_trans_huge(*____pud)                            \
 333                                         || pud_devmap(*____pud))        \
 334                         __split_huge_pud(__vma, __pud, __address);      \
 335         }  while (0)
 336
 337 int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
 338                      int advice);
 339 int madvise_collapse(struct vm_area_struct *vma,
 340                      struct vm_area_struct **prev,
 341                      unsigned long start, unsigned long end);
 342 void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start,
 343                            unsigned long end, long adjust_next);
 344 spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma);
 345 spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma);
 346
 347 static inline int is_swap_pmd(pmd_t pmd)
 348 {
 349         return !pmd_none(pmd) && !pmd_present(pmd);
 350 }
 351
 352 /* mmap_lock must be held on entry */
 353 static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 354                 struct vm_area_struct *vma)
 355 {
 356         if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
 357                 return __pmd_trans_huge_lock(pmd, vma);
 358         else
 359                 return NULL;
 360 }
 361 static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
 362                 struct vm_area_struct *vma)
 363 {
 364         if (pud_trans_huge(*pud) || pud_devmap(*pud))
 365                 return __pud_trans_huge_lock(pud, vma);
 366         else
 367                 return NULL;
 368 }
 369
 370 /**
 371  * folio_test_pmd_mappable - Can we map this folio with a PMD?
 372  * @folio: The folio to test
 373  */
 374 static inline bool folio_test_pmd_mappable(struct folio *folio)
 375 {
 376         return folio_order(folio) >= HPAGE_PMD_ORDER;
 377 }
 378
 379 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 380                 pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
 381
 382 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
 383
 384 extern struct folio *huge_zero_folio;
 385 extern unsigned long huge_zero_pfn;
 386
 387 static inline bool is_huge_zero_folio(const struct folio *folio)
 388 {
 389         return READ_ONCE(huge_zero_folio) == folio;
 390 }
 391
 392 static inline bool is_huge_zero_pmd(pmd_t pmd)
 393 {
 394         return pmd_present(pmd) && READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd);
 395 }
 396
 397 static inline bool is_huge_zero_pud(pud_t pud)
 398 {
 399         return false;
 400 }
 401
 402 struct folio *mm_get_huge_zero_folio(struct mm_struct *mm);
 403 void mm_put_huge_zero_folio(struct mm_struct *mm);
 404
 405 #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
 406
 407 static inline bool thp_migration_supported(void)
 408 {
 409         return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 410 }
 411
 412 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 413
 414 static inline bool folio_test_pmd_mappable(struct folio *folio)
 415 {
 416         return false;
 417 }
 418
 419 static inline bool thp_vma_suitable_order(struct vm_area_struct *vma,
 420                 unsigned long addr, int order)
 421 {
 422         return false;
 423 }
 424
 425 static inline unsigned long thp_vma_suitable_orders(struct vm_area_struct *vma,
 426                 unsigned long addr, unsigned long orders)
 427 {
 428         return 0;
 429 }
 430
 431 static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 432                                         unsigned long vm_flags,
 433                                         unsigned long tva_flags,
 434                                         unsigned long orders)
 435 {
 436         return 0;
 437 }
 438
 439 #define transparent_hugepage_flags 0UL
 440
 441 #define thp_get_unmapped_area   NULL
 442
 443 static inline unsigned long
 444 thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
 445                               unsigned long len, unsigned long pgoff,
 446                               unsigned long flags, vm_flags_t vm_flags)
 447 {
 448         return 0;
 449 }
 450
 451 static inline bool
 452 can_split_folio(struct folio *folio, int *pextra_pins)
 453 {
 454         return false;
 455 }
 456 static inline int
 457 split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 458                 unsigned int new_order)
 459 {
 460         return 0;
 461 }
 462 static inline int split_huge_page(struct page *page)
 463 {
 464         return 0;
 465 }
 466 static inline void deferred_split_folio(struct folio *folio) {}
 467 #define split_huge_pmd(__vma, __pmd, __address) \
 468         do { } while (0)
 469
 470 static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 471                 unsigned long address, bool freeze, struct folio *folio) {}
 472 static inline void split_huge_pmd_address(struct vm_area_struct *vma,
 473                 unsigned long address, bool freeze, struct folio *folio) {}
 474
 475 #define split_huge_pud(__vma, __pmd, __address) \
 476         do { } while (0)
 477
 478 static inline int hugepage_madvise(struct vm_area_struct *vma,
 479                                    unsigned long *vm_flags, int advice)
 480 {
 481         return -EINVAL;
 482 }
 483
 484 static inline int madvise_collapse(struct vm_area_struct *vma,
 485                                    struct vm_area_struct **prev,
 486                                    unsigned long start, unsigned long end)
 487 {
 488         return -EINVAL;
 489 }
 490
 491 static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
 492                                          unsigned long start,
 493                                          unsigned long end,
 494                                          long adjust_next)
 495 {
 496 }
 497 static inline int is_swap_pmd(pmd_t pmd)
 498 {
 499         return 0;
 500 }
 501 static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
 502                 struct vm_area_struct *vma)
 503 {
 504         return NULL;
 505 }
 506 static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
 507                 struct vm_area_struct *vma)
 508 {
 509         return NULL;
 510 }
 511
 512 static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 513 {
 514         return 0;
 515 }
 516
 517 static inline bool is_huge_zero_folio(const struct folio *folio)
 518 {
 519         return false;
 520 }
 521
 522 static inline bool is_huge_zero_pmd(pmd_t pmd)
 523 {
 524         return false;
 525 }
 526
 527 static inline bool is_huge_zero_pud(pud_t pud)
 528 {
 529         return false;
 530 }
 531
 532 static inline void mm_put_huge_zero_folio(struct mm_struct *mm)
 533 {
 534         return;
 535 }
 536
 537 static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
 538         unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
 539 {
 540         return NULL;
 541 }
 542
 543 static inline bool thp_migration_supported(void)
 544 {
 545         return false;
 546 }
 547 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 548
 549 static inline int split_folio_to_list_to_order(struct folio *folio,
 550                 struct list_head *list, int new_order)
 551 {
 552         return split_huge_page_to_list_to_order(&folio->page, list, new_order);
 553 }
 554
 555 static inline int split_folio_to_order(struct folio *folio, int new_order)
 556 {
 557         return split_folio_to_list_to_order(folio, NULL, new_order);
 558 }
 559
 560 #define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
 561 #define split_folio(f) split_folio_to_order(f, 0)
 562
 563 #endif /* _LINUX_HUGE_MM_H */