X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=include%2Flinux%2Fmm.h;h=3c2f6b452586f233dfdbe1f0ce1580733430f2ee;hb=c8070b78751955e59b42457b974bea4a4fe00187;hp=98da268b834a37c1eac55113327bbbf1a38a6049;hpb=513f17f8d6b67563d977c730d50bc0db6ea6e1b0;p=linux-block.git diff --git a/include/linux/mm.h b/include/linux/mm.h index 98da268b834a..3c2f6b452586 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -29,6 +29,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -38,6 +39,7 @@ struct pt_regs; extern int sysctl_page_lock_unfairness; +void mm_core_init(void); void init_mm_internals(void); #ifndef CONFIG_NUMA /* Don't use mapnrs, do it properly */ @@ -96,17 +98,6 @@ extern int mmap_rnd_compat_bits __read_mostly; #include #include -/* - * Architectures that support memory tagging (assigning tags to memory regions, - * embedding these tags into addresses that point to these memory regions, and - * checking that the memory and the pointer tags match on memory accesses) - * redefine this macro to strip tags from pointers. - * It's defined as noop for architectures that don't support memory tagging. - */ -#ifndef untagged_addr -#define untagged_addr(addr) (addr) -#endif - #ifndef __pa_symbol #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) #endif @@ -256,6 +247,8 @@ void setup_initial_init_mm(void *start_code, void *end_code, struct vm_area_struct *vm_area_alloc(struct mm_struct *); struct vm_area_struct *vm_area_dup(struct vm_area_struct *); void vm_area_free(struct vm_area_struct *); +/* Use only if VMA has no other users */ +void __vm_area_free(struct vm_area_struct *vma); #ifndef CONFIG_MMU extern struct rb_root nommu_region_tree; @@ -478,7 +471,8 @@ static inline bool fault_flag_allow_retry_first(enum fault_flag flags) { FAULT_FLAG_USER, "USER" }, \ { FAULT_FLAG_REMOTE, "REMOTE" }, \ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ - { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } + { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \ + { FAULT_FLAG_VMA_LOCK, "VMA_LOCK" } /* * vm_fault is filled by the pagefault handler and passed to the vma's @@ -623,6 +617,131 @@ struct vm_operations_struct { unsigned long addr); }; +#ifdef CONFIG_NUMA_BALANCING +static inline void vma_numab_state_init(struct vm_area_struct *vma) +{ + vma->numab_state = NULL; +} +static inline void vma_numab_state_free(struct vm_area_struct *vma) +{ + kfree(vma->numab_state); +} +#else +static inline void vma_numab_state_init(struct vm_area_struct *vma) {} +static inline void vma_numab_state_free(struct vm_area_struct *vma) {} +#endif /* CONFIG_NUMA_BALANCING */ + +#ifdef CONFIG_PER_VMA_LOCK +/* + * Try to read-lock a vma. The function is allowed to occasionally yield false + * locked result to avoid performance overhead, in which case we fall back to + * using mmap_lock. The function should never yield false unlocked result. + */ +static inline bool vma_start_read(struct vm_area_struct *vma) +{ + /* Check before locking. A race might cause false locked result. */ + if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq)) + return false; + + if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) + return false; + + /* + * Overflow might produce false locked result. + * False unlocked result is impossible because we modify and check + * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq + * modification invalidates all existing locks. + */ + if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) { + up_read(&vma->vm_lock->lock); + return false; + } + return true; +} + +static inline void vma_end_read(struct vm_area_struct *vma) +{ + rcu_read_lock(); /* keeps vma alive till the end of up_read */ + up_read(&vma->vm_lock->lock); + rcu_read_unlock(); +} + +static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) +{ + mmap_assert_write_locked(vma->vm_mm); + + /* + * current task is holding mmap_write_lock, both vma->vm_lock_seq and + * mm->mm_lock_seq can't be concurrently modified. + */ + *mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq); + return (vma->vm_lock_seq == *mm_lock_seq); +} + +static inline void vma_start_write(struct vm_area_struct *vma) +{ + int mm_lock_seq; + + if (__is_vma_write_locked(vma, &mm_lock_seq)) + return; + + down_write(&vma->vm_lock->lock); + vma->vm_lock_seq = mm_lock_seq; + up_write(&vma->vm_lock->lock); +} + +static inline bool vma_try_start_write(struct vm_area_struct *vma) +{ + int mm_lock_seq; + + if (__is_vma_write_locked(vma, &mm_lock_seq)) + return true; + + if (!down_write_trylock(&vma->vm_lock->lock)) + return false; + + vma->vm_lock_seq = mm_lock_seq; + up_write(&vma->vm_lock->lock); + return true; +} + +static inline void vma_assert_write_locked(struct vm_area_struct *vma) +{ + int mm_lock_seq; + + VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); +} + +static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) +{ + /* When detaching vma should be write-locked */ + if (detached) + vma_assert_write_locked(vma); + vma->detached = detached; +} + +struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address); + +#else /* CONFIG_PER_VMA_LOCK */ + +static inline void vma_init_lock(struct vm_area_struct *vma) {} +static inline bool vma_start_read(struct vm_area_struct *vma) + { return false; } +static inline void vma_end_read(struct vm_area_struct *vma) {} +static inline void vma_start_write(struct vm_area_struct *vma) {} +static inline bool vma_try_start_write(struct vm_area_struct *vma) + { return true; } +static inline void vma_assert_write_locked(struct vm_area_struct *vma) {} +static inline void vma_mark_detached(struct vm_area_struct *vma, + bool detached) {} + +#endif /* CONFIG_PER_VMA_LOCK */ + +/* + * WARNING: vma_init does not initialize vma->vm_lock. + * Use vm_area_alloc()/vm_area_free() if vma needs locking. + */ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) { static const struct vm_operations_struct dummy_vm_ops = {}; @@ -631,6 +750,8 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_mm = mm; vma->vm_ops = &dummy_vm_ops; INIT_LIST_HEAD(&vma->anon_vma_chain); + vma_mark_detached(vma, false); + vma_numab_state_init(vma); } /* Use when VMA is not part of the VMA tree and needs no locking */ @@ -644,28 +765,28 @@ static inline void vm_flags_init(struct vm_area_struct *vma, static inline void vm_flags_reset(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); vm_flags_init(vma, flags); } static inline void vm_flags_reset_once(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); } static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); ACCESS_PRIVATE(vma, __vm_flags) |= flags; } static inline void vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); ACCESS_PRIVATE(vma, __vm_flags) &= ~flags; } @@ -686,7 +807,7 @@ static inline void __vm_flags_mod(struct vm_area_struct *vma, static inline void vm_flags_mod(struct vm_area_struct *vma, vm_flags_t set, vm_flags_t clear) { - mmap_assert_write_locked(vma->vm_mm); + vma_start_write(vma); __vm_flags_mod(vma, set, clear); } @@ -1554,6 +1675,16 @@ static inline int xchg_page_access_time(struct page *page, int time) last_time = page_cpupid_xchg_last(page, time >> PAGE_ACCESS_TIME_BUCKETS); return last_time << PAGE_ACCESS_TIME_BUCKETS; } + +static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) +{ + unsigned int pid_bit; + + pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); + if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) { + __set_bit(pid_bit, &vma->numab_state->access_pids[1]); + } +} #else /* !CONFIG_NUMA_BALANCING */ static inline int page_cpupid_xchg_last(struct page *page, int cpupid) { @@ -1603,6 +1734,10 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) { return false; } + +static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) +{ +} #endif /* CONFIG_NUMA_BALANCING */ #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) @@ -1775,6 +1910,28 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, return page_maybe_dma_pinned(page); } +/** + * is_zero_page - Query if a page is a zero page + * @page: The page to query + * + * This returns true if @page is one of the permanent zero pages. + */ +static inline bool is_zero_page(const struct page *page) +{ + return is_zero_pfn(page_to_pfn(page)); +} + +/** + * is_zero_folio - Query if a folio is a zero page + * @folio: The folio to query + * + * This returns true if @folio is one of the permanent zero pages. + */ +static inline bool is_zero_folio(const struct folio *folio) +{ + return is_zero_page(&folio->page); +} + /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */ #ifdef CONFIG_MIGRATION static inline bool is_longterm_pinnable_page(struct page *page) @@ -1785,8 +1942,8 @@ static inline bool is_longterm_pinnable_page(struct page *page) if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) return false; #endif - /* The zero page may always be pinned */ - if (is_zero_pfn(page_to_pfn(page))) + /* The zero page can be "pinned" but gets special handling. */ + if (is_zero_page(page)) return true; /* Coherent device memory must always allow eviction. */ @@ -2636,12 +2793,6 @@ static inline bool ptlock_init(struct page *page) { return true; } static inline void ptlock_free(struct page *page) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ -static inline void pgtable_init(void) -{ - ptlock_cache_init(); - pgtable_cache_init(); -} - static inline bool pgtable_pte_page_ctor(struct page *page) { if (!ptlock_init(page)) @@ -2785,7 +2936,6 @@ extern unsigned long free_reserved_area(void *start, void *end, int poison, const char *s); extern void adjust_managed_page_count(struct page *page, long count); -extern void mem_init_print_info(void); extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end); @@ -2896,7 +3046,6 @@ extern void setup_per_cpu_pageset(void); extern int min_free_kbytes; extern int watermark_boost_factor; extern int watermark_scale_factor; -extern bool arch_has_descending_max_zone_pfns(void); /* nommu.c */ extern atomic_long_t mmap_pages_allocated; @@ -3185,8 +3334,6 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot); vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn); -vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, - pfn_t pfn, pgprot_t pgprot); vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); @@ -3256,7 +3403,6 @@ extern int apply_to_existing_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); -extern void __init init_mem_debugging_and_hardening(void); #ifdef CONFIG_PAGE_POISONING extern void __kernel_poison_pages(struct page *page, int numpages); extern void __kernel_unpoison_pages(struct page *page, int numpages); @@ -3425,6 +3571,22 @@ void vmemmap_populate_print_last(void); void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap); #endif + +#ifdef CONFIG_ARCH_WANT_OPTIMIZE_VMEMMAP +static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, + struct dev_pagemap *pgmap) +{ + return is_power_of_2(sizeof(struct page)) && + pgmap && (pgmap_vmemmap_nr(pgmap) > 1) && !altmap; +} +#else +static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap, + struct dev_pagemap *pgmap) +{ + return false; +} +#endif + void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, unsigned long nr_pages); @@ -3451,6 +3613,7 @@ extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared); void num_poisoned_pages_inc(unsigned long pfn); void num_poisoned_pages_sub(unsigned long pfn, long i); +struct task_struct *task_early_kill(struct task_struct *tsk, int force_early); #else static inline void memory_failure_queue(unsigned long pfn, int flags) { @@ -3471,6 +3634,12 @@ static inline void num_poisoned_pages_sub(unsigned long pfn, long i) } #endif +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_KSM) +void add_to_kill_ksm(struct task_struct *tsk, struct page *p, + struct vm_area_struct *vma, struct list_head *to_kill, + unsigned long ksm_addr); +#endif + #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) extern void memblk_nr_poison_inc(unsigned long pfn); extern void memblk_nr_poison_sub(unsigned long pfn, long i); @@ -3540,14 +3709,12 @@ extern const struct attribute_group memory_failure_attr_group; extern void clear_huge_page(struct page *page, unsigned long addr_hint, unsigned int pages_per_huge_page); -extern void copy_user_huge_page(struct page *dst, struct page *src, - unsigned long addr_hint, - struct vm_area_struct *vma, - unsigned int pages_per_huge_page); -extern long copy_huge_page_from_user(struct page *dst_page, - const void __user *usr_src, - unsigned int pages_per_huge_page, - bool allow_pagefault); +int copy_user_large_folio(struct folio *dst, struct folio *src, + unsigned long addr_hint, + struct vm_area_struct *vma); +long copy_folio_from_user(struct folio *dst_folio, + const void __user *usr_src, + bool allow_pagefault); /** * vma_is_special_huge - Are transhuge page-table entries considered special?