LIST_HEAD(folio_list);
bool pageout_anon_only_filter;
unsigned int batch_count = 0;
+ int nr;
if (fatal_signal_pending(current))
return -EINTR;
goto huge_unlock;
}
- folio = pfn_folio(pmd_pfn(orig_pmd));
+ folio = pmd_folio(orig_pmd);
/* Do not interfere with other mappings of this folio */
- if (folio_estimated_sharers(folio) != 1)
+ if (folio_likely_mapped_shared(folio))
goto huge_unlock;
if (pageout_anon_only_filter && !folio_test_anon(folio))
return 0;
flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
- for (; addr < end; pte++, addr += PAGE_SIZE) {
+ for (; addr < end; pte += nr, addr += nr * PAGE_SIZE) {
+ nr = 1;
ptent = ptep_get(pte);
if (++batch_count == SWAP_CLUSTER_MAX) {
continue;
/*
- * Creating a THP page is expensive so split it only if we
- * are sure it's worth. Split it if we are only owner.
+ * If we encounter a large folio, only split it if it is not
+ * fully mapped within the range we are operating on. Otherwise
+ * leave it as is so that it can be swapped out whole. If we
+ * fail to split a folio, leave it in place and advance to the
+ * next pte in the range.
*/
if (folio_test_large(folio)) {
- int err;
-
- if (folio_estimated_sharers(folio) > 1)
- break;
- if (pageout_anon_only_filter && !folio_test_anon(folio))
- break;
- if (!folio_trylock(folio))
- break;
- folio_get(folio);
- arch_leave_lazy_mmu_mode();
- pte_unmap_unlock(start_pte, ptl);
- start_pte = NULL;
- err = split_folio(folio);
- folio_unlock(folio);
- folio_put(folio);
- if (err)
- break;
- start_pte = pte =
- pte_offset_map_lock(mm, pmd, addr, &ptl);
- if (!start_pte)
- break;
- arch_enter_lazy_mmu_mode();
- pte--;
- addr -= PAGE_SIZE;
- continue;
+ const fpb_t fpb_flags = FPB_IGNORE_DIRTY |
+ FPB_IGNORE_SOFT_DIRTY;
+ int max_nr = (end - addr) / PAGE_SIZE;
+ bool any_young;
+
+ nr = folio_pte_batch(folio, addr, pte, ptent, max_nr,
+ fpb_flags, NULL, &any_young);
+ if (any_young)
+ ptent = pte_mkyoung(ptent);
+
+ if (nr < folio_nr_pages(folio)) {
+ int err;
+
+ if (folio_likely_mapped_shared(folio))
+ continue;
+ if (pageout_anon_only_filter && !folio_test_anon(folio))
+ continue;
+ if (!folio_trylock(folio))
+ continue;
+ folio_get(folio);
+ arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(start_pte, ptl);
+ start_pte = NULL;
+ err = split_folio(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+ start_pte = pte =
+ pte_offset_map_lock(mm, pmd, addr, &ptl);
+ if (!start_pte)
+ break;
+ arch_enter_lazy_mmu_mode();
+ if (!err)
+ nr = 0;
+ continue;
+ }
}
/*
* Do not interfere with other mappings of this folio and
- * non-LRU folio.
+ * non-LRU folio. If we have a large folio at this point, we
+ * know it is fully mapped so if its mapcount is the same as its
+ * number of pages, it must be exclusive.
*/
- if (!folio_test_lru(folio) || folio_mapcount(folio) != 1)
+ if (!folio_test_lru(folio) ||
+ folio_mapcount(folio) != folio_nr_pages(folio))
continue;
if (pageout_anon_only_filter && !folio_test_anon(folio))
continue;
- VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
-
if (!pageout && pte_young(ptent)) {
- ptent = ptep_get_and_clear_full(mm, addr, pte,
- tlb->fullmm);
- ptent = pte_mkold(ptent);
- set_pte_at(mm, addr, pte, ptent);
- tlb_remove_tlb_entry(tlb, pte, addr);
+ mkold_ptes(vma, addr, pte, nr);
+ tlb_remove_tlb_entries(tlb, pte, nr, addr);
}
/*
struct folio *folio;
int nr_swap = 0;
unsigned long next;
+ int nr, max_nr;
next = pmd_addr_end(addr, end);
if (pmd_trans_huge(*pmd))
return 0;
flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode();
- for (; addr != end; pte++, addr += PAGE_SIZE) {
+ for (; addr != end; pte += nr, addr += PAGE_SIZE * nr) {
+ nr = 1;
ptent = ptep_get(pte);
if (pte_none(ptent))
entry = pte_to_swp_entry(ptent);
if (!non_swap_entry(entry)) {
- nr_swap--;
- free_swap_and_cache(entry);
- pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
+ max_nr = (end - addr) / PAGE_SIZE;
+ nr = swap_pte_batch(pte, max_nr, ptent);
+ nr_swap -= nr;
+ free_swap_and_cache_nr(entry, nr);
+ clear_not_present_full_ptes(mm, addr, pte, nr, tlb->fullmm);
} else if (is_hwpoison_entry(entry) ||
is_poisoned_swp_entry(entry)) {
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
if (folio_test_large(folio)) {
int err;
- if (folio_estimated_sharers(folio) != 1)
+ if (folio_likely_mapped_shared(folio))
break;
if (!folio_trylock(folio))
break;
return -EINVAL;
}
-static long madvise_populate(struct vm_area_struct *vma,
- struct vm_area_struct **prev,
- unsigned long start, unsigned long end,
- int behavior)
+static long madvise_populate(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int behavior)
{
const bool write = behavior == MADV_POPULATE_WRITE;
- struct mm_struct *mm = vma->vm_mm;
- unsigned long tmp_end;
int locked = 1;
long pages;
- *prev = vma;
-
while (start < end) {
- /*
- * We might have temporarily dropped the lock. For example,
- * our VMA might have been split.
- */
- if (!vma || start >= vma->vm_end) {
- vma = vma_lookup(mm, start);
- if (!vma)
- return -ENOMEM;
- }
-
- tmp_end = min_t(unsigned long, end, vma->vm_end);
/* Populate (prefault) page tables readable/writable. */
- pages = faultin_vma_page_range(vma, start, tmp_end, write,
- &locked);
+ pages = faultin_page_range(mm, start, end, write, &locked);
if (!locked) {
mmap_read_lock(mm);
locked = 1;
- *prev = NULL;
- vma = NULL;
}
if (pages < 0) {
switch (pages) {
pr_warn_once("%s: unhandled return value: %ld\n",
__func__, pages);
fallthrough;
- case -ENOMEM:
+ case -ENOMEM: /* No VMA or out of memory. */
return -ENOMEM;
}
}
case MADV_DONTNEED:
case MADV_DONTNEED_LOCKED:
return madvise_dontneed_free(vma, prev, start, end, behavior);
- case MADV_POPULATE_READ:
- case MADV_POPULATE_WRITE:
- return madvise_populate(vma, prev, start, end, behavior);
case MADV_NORMAL:
new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
break;
end = start + len;
blk_start_plug(&plug);
- error = madvise_walk_vmas(mm, start, end, behavior,
- madvise_vma_behavior);
+ switch (behavior) {
+ case MADV_POPULATE_READ:
+ case MADV_POPULATE_WRITE:
+ error = madvise_populate(mm, start, end, behavior);
+ break;
+ default:
+ error = madvise_walk_vmas(mm, start, end, behavior,
+ madvise_vma_behavior);
+ break;
+ }
blk_finish_plug(&plug);
if (write)
mmap_write_unlock(mm);