Merge tag 'char-misc-6.10-rc1-fix' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / mm / page_alloc.c
index 14d39f34d3367fbb9683683be7e6ed150f97fae2..2e22ce5675ca1af9d4e82ffe1096af53dee56f75 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/khugepaged.h>
 #include <linux/delayacct.h>
 #include <linux/cacheinfo.h>
+#include <linux/pgalloc_tag.h>
 #include <asm/div64.h>
 #include "internal.h"
 #include "shuffle.h"
@@ -206,24 +207,6 @@ EXPORT_SYMBOL(node_states);
 
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
 
-/*
- * A cached value of the page's pageblock's migratetype, used when the page is
- * put on a pcplist. Used to avoid the pageblock migratetype lookup when
- * freeing from pcplists in most cases, at the cost of possibly becoming stale.
- * Also the migratetype set in the page does not necessarily match the pcplist
- * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any
- * other index - this ensures that it will be put on the correct CMA freelist.
- */
-static inline int get_pcppage_migratetype(struct page *page)
-{
-       return page->index;
-}
-
-static inline void set_pcppage_migratetype(struct page *page, int migratetype)
-{
-       page->index = migratetype;
-}
-
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 unsigned int pageblock_order __read_mostly;
 #endif
@@ -332,7 +315,7 @@ static inline bool deferred_pages_enabled(void)
 static bool __ref
 _deferred_grow_zone(struct zone *zone, unsigned int order)
 {
-       return deferred_grow_zone(zone, order);
+       return deferred_grow_zone(zone, order);
 }
 #else
 static inline bool deferred_pages_enabled(void)
@@ -523,7 +506,7 @@ static inline unsigned int order_to_pindex(int migratetype, int order)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
        if (order > PAGE_ALLOC_COSTLY_ORDER) {
-               VM_BUG_ON(order != pageblock_order);
+               VM_BUG_ON(order != HPAGE_PMD_ORDER);
                return NR_LOWORDER_PCP_LISTS;
        }
 #else
@@ -539,7 +522,7 @@ static inline int pindex_to_order(unsigned int pindex)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
        if (pindex == NR_LOWORDER_PCP_LISTS)
-               order = pageblock_order;
+               order = HPAGE_PMD_ORDER;
 #else
        VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
 #endif
@@ -552,20 +535,12 @@ static inline bool pcp_allowed_order(unsigned int order)
        if (order <= PAGE_ALLOC_COSTLY_ORDER)
                return true;
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       if (order == pageblock_order)
+       if (order == HPAGE_PMD_ORDER)
                return true;
 #endif
        return false;
 }
 
-static inline void free_the_page(struct page *page, unsigned int order)
-{
-       if (pcp_allowed_order(order))           /* Via pcp? */
-               free_unref_page(page, order);
-       else
-               __free_pages_ok(page, order, FPI_NONE);
-}
-
 /*
  * Higher-order pages are called "compound pages".  They are structured thusly:
  *
@@ -590,20 +565,6 @@ void prep_compound_page(struct page *page, unsigned int order)
        prep_compound_head(page, order);
 }
 
-void destroy_large_folio(struct folio *folio)
-{
-       if (folio_test_hugetlb(folio)) {
-               free_huge_folio(folio);
-               return;
-       }
-
-       if (folio_test_large_rmappable(folio))
-               folio_undo_large_rmappable(folio);
-
-       mem_cgroup_uncharge(folio);
-       free_the_page(&folio->page, folio_order(folio));
-}
-
 static inline void set_buddy_order(struct page *page, unsigned int order)
 {
        set_page_private(page, order);
@@ -634,12 +595,14 @@ compaction_capture(struct capture_control *capc, struct page *page,
                return false;
 
        /*
-        * Do not let lower order allocations pollute a movable pageblock.
+        * Do not let lower order allocations pollute a movable pageblock
+        * unless compaction is also requesting movable pages.
         * This might let an unmovable request use a reclaimable pageblock
         * and vice-versa but no more than normal fallback logic which can
         * have trouble finding a high-order free page.
         */
-       if (order < pageblock_order && migratetype == MIGRATE_MOVABLE)
+       if (order < pageblock_order && migratetype == MIGRATE_MOVABLE &&
+           capc->cc->migratetype != MIGRATE_MOVABLE)
                return false;
 
        capc->page = page;
@@ -660,23 +623,33 @@ compaction_capture(struct capture_control *capc, struct page *page,
 }
 #endif /* CONFIG_COMPACTION */
 
-/* Used for pages not on another list */
-static inline void add_to_free_list(struct page *page, struct zone *zone,
-                                   unsigned int order, int migratetype)
+static inline void account_freepages(struct zone *zone, int nr_pages,
+                                    int migratetype)
 {
-       struct free_area *area = &zone->free_area[order];
+       if (is_migrate_isolate(migratetype))
+               return;
 
-       list_add(&page->buddy_list, &area->free_list[migratetype]);
-       area->nr_free++;
+       __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages);
+
+       if (is_migrate_cma(migratetype))
+               __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
 }
 
 /* Used for pages not on another list */
-static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
-                                        unsigned int order, int migratetype)
+static inline void __add_to_free_list(struct page *page, struct zone *zone,
+                                     unsigned int order, int migratetype,
+                                     bool tail)
 {
        struct free_area *area = &zone->free_area[order];
 
-       list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
+       VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
+                    "page type is %lu, passed migratetype is %d (nr=%d)\n",
+                    get_pageblock_migratetype(page), migratetype, 1 << order);
+
+       if (tail)
+               list_add_tail(&page->buddy_list, &area->free_list[migratetype]);
+       else
+               list_add(&page->buddy_list, &area->free_list[migratetype]);
        area->nr_free++;
 }
 
@@ -686,16 +659,28 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
  * allocation again (e.g., optimization for memory onlining).
  */
 static inline void move_to_free_list(struct page *page, struct zone *zone,
-                                    unsigned int order, int migratetype)
+                                    unsigned int order, int old_mt, int new_mt)
 {
        struct free_area *area = &zone->free_area[order];
 
-       list_move_tail(&page->buddy_list, &area->free_list[migratetype]);
+       /* Free page moving can fail, so it happens before the type update */
+       VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt,
+                    "page type is %lu, passed migratetype is %d (nr=%d)\n",
+                    get_pageblock_migratetype(page), old_mt, 1 << order);
+
+       list_move_tail(&page->buddy_list, &area->free_list[new_mt]);
+
+       account_freepages(zone, -(1 << order), old_mt);
+       account_freepages(zone, 1 << order, new_mt);
 }
 
-static inline void del_page_from_free_list(struct page *page, struct zone *zone,
-                                          unsigned int order)
+static inline void __del_page_from_free_list(struct page *page, struct zone *zone,
+                                            unsigned int order, int migratetype)
 {
+        VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype,
+                    "page type is %lu, passed migratetype is %d (nr=%d)\n",
+                    get_pageblock_migratetype(page), migratetype, 1 << order);
+
        /* clear reported state and update reported page count */
        if (page_reported(page))
                __ClearPageReported(page);
@@ -706,6 +691,13 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
        zone->free_area[order].nr_free--;
 }
 
+static inline void del_page_from_free_list(struct page *page, struct zone *zone,
+                                          unsigned int order, int migratetype)
+{
+       __del_page_from_free_list(page, zone, order, migratetype);
+       account_freepages(zone, -(1 << order), migratetype);
+}
+
 static inline struct page *get_page_from_free_area(struct free_area *area,
                                            int migratetype)
 {
@@ -777,16 +769,16 @@ static inline void __free_one_page(struct page *page,
        VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page);
 
        VM_BUG_ON(migratetype == -1);
-       if (likely(!is_migrate_isolate(migratetype)))
-               __mod_zone_freepage_state(zone, 1 << order, migratetype);
-
        VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page);
        VM_BUG_ON_PAGE(bad_range(zone, page), page);
 
+       account_freepages(zone, 1 << order, migratetype);
+
        while (order < MAX_PAGE_ORDER) {
+               int buddy_mt = migratetype;
+
                if (compaction_capture(capc, page, order, migratetype)) {
-                       __mod_zone_freepage_state(zone, -(1 << order),
-                                                               migratetype);
+                       account_freepages(zone, -(1 << order), migratetype);
                        return;
                }
 
@@ -801,11 +793,11 @@ static inline void __free_one_page(struct page *page,
                         * pageblock isolation could cause incorrect freepage or CMA
                         * accounting or HIGHATOMIC accounting.
                         */
-                       int buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn);
+                       buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn);
 
-                       if (migratetype != buddy_mt
-                                       && (!migratetype_is_mergeable(migratetype) ||
-                                               !migratetype_is_mergeable(buddy_mt)))
+                       if (migratetype != buddy_mt &&
+                           (!migratetype_is_mergeable(migratetype) ||
+                            !migratetype_is_mergeable(buddy_mt)))
                                goto done_merging;
                }
 
@@ -814,9 +806,19 @@ static inline void __free_one_page(struct page *page,
                 * merge with it and move up one order.
                 */
                if (page_is_guard(buddy))
-                       clear_page_guard(zone, buddy, order, migratetype);
+                       clear_page_guard(zone, buddy, order);
                else
-                       del_page_from_free_list(buddy, zone, order);
+                       __del_page_from_free_list(buddy, zone, order, buddy_mt);
+
+               if (unlikely(buddy_mt != migratetype)) {
+                       /*
+                        * Match buddy type. This ensures that an
+                        * expand() down the line puts the sub-blocks
+                        * on the right freelists.
+                        */
+                       set_pageblock_migratetype(buddy, migratetype);
+               }
+
                combined_pfn = buddy_pfn & pfn;
                page = page + (combined_pfn - pfn);
                pfn = combined_pfn;
@@ -833,74 +835,13 @@ done_merging:
        else
                to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
 
-       if (to_tail)
-               add_to_free_list_tail(page, zone, order, migratetype);
-       else
-               add_to_free_list(page, zone, order, migratetype);
+       __add_to_free_list(page, zone, order, migratetype, to_tail);
 
        /* Notify page reporting subsystem of freed page */
        if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY))
                page_reporting_notify_free(order);
 }
 
-/**
- * split_free_page() -- split a free page at split_pfn_offset
- * @free_page:         the original free page
- * @order:             the order of the page
- * @split_pfn_offset:  split offset within the page
- *
- * Return -ENOENT if the free page is changed, otherwise 0
- *
- * It is used when the free page crosses two pageblocks with different migratetypes
- * at split_pfn_offset within the page. The split free page will be put into
- * separate migratetype lists afterwards. Otherwise, the function achieves
- * nothing.
- */
-int split_free_page(struct page *free_page,
-                       unsigned int order, unsigned long split_pfn_offset)
-{
-       struct zone *zone = page_zone(free_page);
-       unsigned long free_page_pfn = page_to_pfn(free_page);
-       unsigned long pfn;
-       unsigned long flags;
-       int free_page_order;
-       int mt;
-       int ret = 0;
-
-       if (split_pfn_offset == 0)
-               return ret;
-
-       spin_lock_irqsave(&zone->lock, flags);
-
-       if (!PageBuddy(free_page) || buddy_order(free_page) != order) {
-               ret = -ENOENT;
-               goto out;
-       }
-
-       mt = get_pfnblock_migratetype(free_page, free_page_pfn);
-       if (likely(!is_migrate_isolate(mt)))
-               __mod_zone_freepage_state(zone, -(1UL << order), mt);
-
-       del_page_from_free_list(free_page, zone, order);
-       for (pfn = free_page_pfn;
-            pfn < free_page_pfn + (1UL << order);) {
-               int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
-
-               free_page_order = min_t(unsigned int,
-                                       pfn ? __ffs(pfn) : order,
-                                       __fls(split_pfn_offset));
-               __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order,
-                               mt, FPI_NONE);
-               pfn += 1UL << free_page_order;
-               split_pfn_offset -= (1UL << free_page_order);
-               /* we have done the first part, now switch to second part */
-               if (split_pfn_offset == 0)
-                       split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
-       }
-out:
-       spin_unlock_irqrestore(&zone->lock, flags);
-       return ret;
-}
 /*
  * A bad page could be due to a number of fields. Instead of multiple branches,
  * try and check multiple fields with one check. The caller must do a detailed
@@ -996,6 +937,10 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
                        bad_page(page, "nonzero entire_mapcount");
                        goto out;
                }
+               if (unlikely(folio_large_mapcount(folio))) {
+                       bad_page(page, "nonzero large_mapcount");
+                       goto out;
+               }
                if (unlikely(atomic_read(&folio->_nr_pages_mapped))) {
                        bad_page(page, "nonzero nr_pages_mapped");
                        goto out;
@@ -1006,10 +951,11 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page)
                }
                break;
        case 2:
-               /*
-                * the second tail page: ->mapping is
-                * deferred_list.next -- ignore value.
-                */
+               /* the second tail page: deferred_list overlaps ->mapping */
+               if (unlikely(!list_empty(&folio->_deferred_list))) {
+                       bad_page(page, "on deferred list");
+                       goto out;
+               }
                break;
        default:
                if (page->mapping != TAIL_MAPPING) {
@@ -1070,7 +1016,7 @@ static inline bool should_skip_kasan_poison(struct page *page)
        return page_kasan_tag(page) == KASAN_TAG_KERNEL;
 }
 
-static void kernel_init_pages(struct page *page, int numpages)
+void kernel_init_pages(struct page *page, int numpages)
 {
        int i;
 
@@ -1101,6 +1047,7 @@ __always_inline bool free_pages_prepare(struct page *page,
                /* Do not let hwpoison pages hit pcplists/buddy */
                reset_page_owner(page, order);
                page_table_check_free(page, order);
+               pgalloc_tag_sub(page, 1 << order);
                return false;
        }
 
@@ -1140,6 +1087,7 @@ __always_inline bool free_pages_prepare(struct page *page,
        page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
        reset_page_owner(page, order);
        page_table_check_free(page, order);
+       pgalloc_tag_sub(page, 1 << order);
 
        if (!PageHighMem(page)) {
                debug_check_no_locks_freed(page_address(page),
@@ -1191,7 +1139,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 {
        unsigned long flags;
        unsigned int order;
-       bool isolated_pageblocks;
        struct page *page;
 
        /*
@@ -1204,7 +1151,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
        pindex = pindex - 1;
 
        spin_lock_irqsave(&zone->lock, flags);
-       isolated_pageblocks = has_isolate_pageblock(zone);
 
        while (count > 0) {
                struct list_head *list;
@@ -1220,23 +1166,19 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                order = pindex_to_order(pindex);
                nr_pages = 1 << order;
                do {
+                       unsigned long pfn;
                        int mt;
 
                        page = list_last_entry(list, struct page, pcp_list);
-                       mt = get_pcppage_migratetype(page);
+                       pfn = page_to_pfn(page);
+                       mt = get_pfnblock_migratetype(page, pfn);
 
                        /* must delete to avoid corrupting pcp list */
                        list_del(&page->pcp_list);
                        count -= nr_pages;
                        pcp->count -= nr_pages;
 
-                       /* MIGRATE_ISOLATE page should not go to pcplists */
-                       VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
-                       /* Pageblock could have been isolated meanwhile */
-                       if (unlikely(isolated_pageblocks))
-                               mt = get_pageblock_migratetype(page);
-
-                       __free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
+                       __free_one_page(page, pfn, zone, order, mt, FPI_NONE);
                        trace_mm_page_pcpu_drain(page, order, mt);
                } while (count > 0 && !list_empty(list));
        }
@@ -1244,18 +1186,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
        spin_unlock_irqrestore(&zone->lock, flags);
 }
 
-static void free_one_page(struct zone *zone,
-                               struct page *page, unsigned long pfn,
-                               unsigned int order,
-                               int migratetype, fpi_t fpi_flags)
+static void free_one_page(struct zone *zone, struct page *page,
+                         unsigned long pfn, unsigned int order,
+                         fpi_t fpi_flags)
 {
        unsigned long flags;
+       int migratetype;
 
        spin_lock_irqsave(&zone->lock, flags);
-       if (unlikely(has_isolate_pageblock(zone) ||
-               is_migrate_isolate(migratetype))) {
-               migratetype = get_pfnblock_migratetype(page, pfn);
-       }
+       migratetype = get_pfnblock_migratetype(page, pfn);
        __free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
        spin_unlock_irqrestore(&zone->lock, flags);
 }
@@ -1263,21 +1202,13 @@ static void free_one_page(struct zone *zone,
 static void __free_pages_ok(struct page *page, unsigned int order,
                            fpi_t fpi_flags)
 {
-       int migratetype;
        unsigned long pfn = page_to_pfn(page);
        struct zone *zone = page_zone(page);
 
        if (!free_pages_prepare(page, order))
                return;
 
-       /*
-        * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here
-        * is used to avoid calling get_pfnblock_migratetype() under the lock.
-        * This will reduce the lock holding time.
-        */
-       migratetype = get_pfnblock_migratetype(page, pfn);
-
-       free_one_page(zone, page, pfn, order, migratetype, fpi_flags);
+       free_one_page(zone, page, pfn, order, fpi_flags);
 
        __count_vm_events(PGFREE, 1 << order);
 }
@@ -1388,6 +1319,7 @@ static inline void expand(struct zone *zone, struct page *page,
        int low, int high, int migratetype)
 {
        unsigned long size = 1 << high;
+       unsigned long nr_added = 0;
 
        while (high > low) {
                high--;
@@ -1400,12 +1332,14 @@ static inline void expand(struct zone *zone, struct page *page,
                 * Corresponding page table entries will not be touched,
                 * pages will stay not present in virtual address space
                 */
-               if (set_page_guard(zone, &page[size], high, migratetype))
+               if (set_page_guard(zone, &page[size], high))
                        continue;
 
-               add_to_free_list(&page[size], zone, high, migratetype);
+               __add_to_free_list(&page[size], zone, high, migratetype, false);
                set_buddy_order(&page[size], high);
+               nr_added += size;
        }
+       account_freepages(zone, nr_added, migratetype);
 }
 
 static void check_new_page_bad(struct page *page)
@@ -1533,6 +1467,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 
        set_page_owner(page, order, gfp_flags);
        page_table_check_alloc(page, order);
+       pgalloc_tag_add(page, current, 1 << order);
 }
 
 static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
@@ -1573,9 +1508,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
                page = get_page_from_free_area(area, migratetype);
                if (!page)
                        continue;
-               del_page_from_free_list(page, zone, current_order);
+               del_page_from_free_list(page, zone, current_order, migratetype);
                expand(zone, page, order, current_order, migratetype);
-               set_pcppage_migratetype(page, migratetype);
                trace_mm_page_alloc_zone_locked(page, order, migratetype,
                                pcp_allowed_order(order) &&
                                migratetype < MIGRATE_PCPTYPES);
@@ -1592,7 +1526,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
  *
  * The other migratetypes do not have fallbacks.
  */
-static int fallbacks[MIGRATE_TYPES][MIGRATE_PCPTYPES - 1] = {
+static int fallbacks[MIGRATE_PCPTYPES][MIGRATE_PCPTYPES - 1] = {
        [MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE   },
        [MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE },
        [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE   },
@@ -1610,30 +1544,23 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
 #endif
 
 /*
- * Move the free pages in a range to the freelist tail of the requested type.
- * Note that start_page and end_pages are not aligned on a pageblock
- * boundary. If alignment is required, use move_freepages_block()
+ * Change the type of a block and move all its free pages to that
+ * type's freelist.
  */
-static int move_freepages(struct zone *zone,
-                         unsigned long start_pfn, unsigned long end_pfn,
-                         int migratetype, int *num_movable)
+static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
+                                 int old_mt, int new_mt)
 {
        struct page *page;
-       unsigned long pfn;
+       unsigned long pfn, end_pfn;
        unsigned int order;
        int pages_moved = 0;
 
-       for (pfn = start_pfn; pfn <= end_pfn;) {
+       VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1));
+       end_pfn = pageblock_end_pfn(start_pfn);
+
+       for (pfn = start_pfn; pfn < end_pfn;) {
                page = pfn_to_page(pfn);
                if (!PageBuddy(page)) {
-                       /*
-                        * We assume that pages that could be isolated for
-                        * migration are movable. But we don't actually try
-                        * isolating, as that would be expensive.
-                        */
-                       if (num_movable &&
-                                       (PageLRU(page) || __PageMovable(page)))
-                               (*num_movable)++;
                        pfn++;
                        continue;
                }
@@ -1643,35 +1570,186 @@ static int move_freepages(struct zone *zone,
                VM_BUG_ON_PAGE(page_zone(page) != zone, page);
 
                order = buddy_order(page);
-               move_to_free_list(page, zone, order, migratetype);
+
+               move_to_free_list(page, zone, order, old_mt, new_mt);
+
                pfn += 1 << order;
                pages_moved += 1 << order;
        }
 
+       set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
+
        return pages_moved;
 }
 
-int move_freepages_block(struct zone *zone, struct page *page,
-                               int migratetype, int *num_movable)
+static bool prep_move_freepages_block(struct zone *zone, struct page *page,
+                                     unsigned long *start_pfn,
+                                     int *num_free, int *num_movable)
 {
-       unsigned long start_pfn, end_pfn, pfn;
+       unsigned long pfn, start, end;
 
-       if (num_movable)
+       pfn = page_to_pfn(page);
+       start = pageblock_start_pfn(pfn);
+       end = pageblock_end_pfn(pfn);
+
+       /*
+        * The caller only has the lock for @zone, don't touch ranges
+        * that straddle into other zones. While we could move part of
+        * the range that's inside the zone, this call is usually
+        * accompanied by other operations such as migratetype updates
+        * which also should be locked.
+        */
+       if (!zone_spans_pfn(zone, start))
+               return false;
+       if (!zone_spans_pfn(zone, end - 1))
+               return false;
+
+       *start_pfn = start;
+
+       if (num_free) {
+               *num_free = 0;
                *num_movable = 0;
+               for (pfn = start; pfn < end;) {
+                       page = pfn_to_page(pfn);
+                       if (PageBuddy(page)) {
+                               int nr = 1 << buddy_order(page);
 
-       pfn = page_to_pfn(page);
-       start_pfn = pageblock_start_pfn(pfn);
-       end_pfn = pageblock_end_pfn(pfn) - 1;
+                               *num_free += nr;
+                               pfn += nr;
+                               continue;
+                       }
+                       /*
+                        * We assume that pages that could be isolated for
+                        * migration are movable. But we don't actually try
+                        * isolating, as that would be expensive.
+                        */
+                       if (PageLRU(page) || __PageMovable(page))
+                               (*num_movable)++;
+                       pfn++;
+               }
+       }
 
-       /* Do not cross zone boundaries */
-       if (!zone_spans_pfn(zone, start_pfn))
-               start_pfn = pfn;
-       if (!zone_spans_pfn(zone, end_pfn))
-               return 0;
+       return true;
+}
+
+static int move_freepages_block(struct zone *zone, struct page *page,
+                               int old_mt, int new_mt)
+{
+       unsigned long start_pfn;
+
+       if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
+               return -1;
+
+       return __move_freepages_block(zone, start_pfn, old_mt, new_mt);
+}
+
+#ifdef CONFIG_MEMORY_ISOLATION
+/* Look for a buddy that straddles start_pfn */
+static unsigned long find_large_buddy(unsigned long start_pfn)
+{
+       int order = 0;
+       struct page *page;
+       unsigned long pfn = start_pfn;
+
+       while (!PageBuddy(page = pfn_to_page(pfn))) {
+               /* Nothing found */
+               if (++order > MAX_PAGE_ORDER)
+                       return start_pfn;
+               pfn &= ~0UL << order;
+       }
+
+       /*
+        * Found a preceding buddy, but does it straddle?
+        */
+       if (pfn + (1 << buddy_order(page)) > start_pfn)
+               return pfn;
+
+       /* Nothing found */
+       return start_pfn;
+}
+
+/* Split a multi-block free page into its individual pageblocks */
+static void split_large_buddy(struct zone *zone, struct page *page,
+                             unsigned long pfn, int order)
+{
+       unsigned long end_pfn = pfn + (1 << order);
+
+       VM_WARN_ON_ONCE(order <= pageblock_order);
+       VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1));
+
+       /* Caller removed page from freelist, buddy info cleared! */
+       VM_WARN_ON_ONCE(PageBuddy(page));
+
+       while (pfn != end_pfn) {
+               int mt = get_pfnblock_migratetype(page, pfn);
+
+               __free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE);
+               pfn += pageblock_nr_pages;
+               page = pfn_to_page(pfn);
+       }
+}
+
+/**
+ * move_freepages_block_isolate - move free pages in block for page isolation
+ * @zone: the zone
+ * @page: the pageblock page
+ * @migratetype: migratetype to set on the pageblock
+ *
+ * This is similar to move_freepages_block(), but handles the special
+ * case encountered in page isolation, where the block of interest
+ * might be part of a larger buddy spanning multiple pageblocks.
+ *
+ * Unlike the regular page allocator path, which moves pages while
+ * stealing buddies off the freelist, page isolation is interested in
+ * arbitrary pfn ranges that may have overlapping buddies on both ends.
+ *
+ * This function handles that. Straddling buddies are split into
+ * individual pageblocks. Only the block of interest is moved.
+ *
+ * Returns %true if pages could be moved, %false otherwise.
+ */
+bool move_freepages_block_isolate(struct zone *zone, struct page *page,
+                                 int migratetype)
+{
+       unsigned long start_pfn, pfn;
+
+       if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
+               return false;
+
+       /* No splits needed if buddies can't span multiple blocks */
+       if (pageblock_order == MAX_PAGE_ORDER)
+               goto move;
+
+       /* We're a tail block in a larger buddy */
+       pfn = find_large_buddy(start_pfn);
+       if (pfn != start_pfn) {
+               struct page *buddy = pfn_to_page(pfn);
+               int order = buddy_order(buddy);
 
-       return move_freepages(zone, start_pfn, end_pfn, migratetype,
-                                                               num_movable);
+               del_page_from_free_list(buddy, zone, order,
+                                       get_pfnblock_migratetype(buddy, pfn));
+               set_pageblock_migratetype(page, migratetype);
+               split_large_buddy(zone, buddy, pfn, order);
+               return true;
+       }
+
+       /* We're the starting block of a larger buddy */
+       if (PageBuddy(page) && buddy_order(page) > pageblock_order) {
+               int order = buddy_order(page);
+
+               del_page_from_free_list(page, zone, order,
+                                       get_pfnblock_migratetype(page, pfn));
+               set_pageblock_migratetype(page, migratetype);
+               split_large_buddy(zone, page, pfn, order);
+               return true;
+       }
+move:
+       __move_freepages_block(zone, start_pfn,
+                              get_pfnblock_migratetype(page, start_pfn),
+                              migratetype);
+       return true;
 }
+#endif /* CONFIG_MEMORY_ISOLATION */
 
 static void change_pageblock_range(struct page *pageblock_page,
                                        int start_order, int migratetype)
@@ -1755,33 +1833,37 @@ static inline bool boost_watermark(struct zone *zone)
 }
 
 /*
- * This function implements actual steal behaviour. If order is large enough,
- * we can steal whole pageblock. If not, we first move freepages in this
- * pageblock to our migratetype and determine how many already-allocated pages
- * are there in the pageblock with a compatible migratetype. If at least half
- * of pages are free or compatible, we can change migratetype of the pageblock
- * itself, so pages freed in the future will be put on the correct free list.
+ * This function implements actual steal behaviour. If order is large enough, we
+ * can claim the whole pageblock for the requested migratetype. If not, we check
+ * the pageblock for constituent pages; if at least half of the pages are free
+ * or compatible, we can still claim the whole block, so pages freed in the
+ * future will be put on the correct free list. Otherwise, we isolate exactly
+ * the order we need from the fallback block and leave its migratetype alone.
  */
-static void steal_suitable_fallback(struct zone *zone, struct page *page,
-               unsigned int alloc_flags, int start_type, bool whole_block)
+static struct page *
+steal_suitable_fallback(struct zone *zone, struct page *page,
+                       int current_order, int order, int start_type,
+                       unsigned int alloc_flags, bool whole_block)
 {
-       unsigned int current_order = buddy_order(page);
        int free_pages, movable_pages, alike_pages;
-       int old_block_type;
+       unsigned long start_pfn;
+       int block_type;
 
-       old_block_type = get_pageblock_migratetype(page);
+       block_type = get_pageblock_migratetype(page);
 
        /*
         * This can happen due to races and we want to prevent broken
         * highatomic accounting.
         */
-       if (is_migrate_highatomic(old_block_type))
+       if (is_migrate_highatomic(block_type))
                goto single_page;
 
        /* Take ownership for orders >= pageblock_order */
        if (current_order >= pageblock_order) {
+               del_page_from_free_list(page, zone, current_order, block_type);
                change_pageblock_range(page, current_order, start_type);
-               goto single_page;
+               expand(zone, page, order, current_order, start_type);
+               return page;
        }
 
        /*
@@ -1796,10 +1878,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
        if (!whole_block)
                goto single_page;
 
-       free_pages = move_freepages_block(zone, page, start_type,
-                                               &movable_pages);
        /* moving whole block can fail due to zone boundary conditions */
-       if (!free_pages)
+       if (!prep_move_freepages_block(zone, page, &start_pfn, &free_pages,
+                                      &movable_pages))
                goto single_page;
 
        /*
@@ -1817,7 +1898,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
                 * vice versa, be conservative since we can't distinguish the
                 * exact migratetype of non-movable pages.
                 */
-               if (old_block_type == MIGRATE_MOVABLE)
+               if (block_type == MIGRATE_MOVABLE)
                        alike_pages = pageblock_nr_pages
                                                - (free_pages + movable_pages);
                else
@@ -1828,13 +1909,15 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page,
         * compatible migratability as our allocation, claim the whole block.
         */
        if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
-                       page_group_by_mobility_disabled)
-               set_pageblock_migratetype(page, start_type);
-
-       return;
+                       page_group_by_mobility_disabled) {
+               __move_freepages_block(zone, start_pfn, block_type, start_type);
+               return __rmqueue_smallest(zone, order, start_type);
+       }
 
 single_page:
-       move_to_free_list(page, zone, current_order, start_type);
+       del_page_from_free_list(page, zone, current_order, block_type);
+       expand(zone, page, order, current_order, block_type);
+       return page;
 }
 
 /*
@@ -1901,11 +1984,10 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone)
        /* Yoink! */
        mt = get_pageblock_migratetype(page);
        /* Only reserve normal pageblocks (i.e., they can merge with others) */
-       if (migratetype_is_mergeable(mt)) {
-               zone->nr_reserved_highatomic += pageblock_nr_pages;
-               set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC);
-               move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL);
-       }
+       if (migratetype_is_mergeable(mt))
+               if (move_freepages_block(zone, page, mt,
+                                        MIGRATE_HIGHATOMIC) != -1)
+                       zone->nr_reserved_highatomic += pageblock_nr_pages;
 
 out_unlock:
        spin_unlock_irqrestore(&zone->lock, flags);
@@ -1929,7 +2011,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
        struct zone *zone;
        struct page *page;
        int order;
-       bool ret;
+       int ret;
 
        for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
                                                                ac->nodemask) {
@@ -1944,11 +2026,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
                spin_lock_irqsave(&zone->lock, flags);
                for (order = 0; order < NR_PAGE_ORDERS; order++) {
                        struct free_area *area = &(zone->free_area[order]);
+                       int mt;
 
                        page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
                        if (!page)
                                continue;
 
+                       mt = get_pageblock_migratetype(page);
                        /*
                         * In page freeing path, migratetype change is racy so
                         * we can counter several free pages in a pageblock
@@ -1956,7 +2040,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
                         * from highatomic to ac->migratetype. So we should
                         * adjust the count once.
                         */
-                       if (is_migrate_highatomic_page(page)) {
+                       if (is_migrate_highatomic(mt)) {
                                /*
                                 * It should never happen but changes to
                                 * locking could inadvertently allow a per-cpu
@@ -1978,10 +2062,14 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
                         * of pageblocks that cannot be completely freed
                         * may increase.
                         */
-                       set_pageblock_migratetype(page, ac->migratetype);
-                       ret = move_freepages_block(zone, page, ac->migratetype,
-                                                                       NULL);
-                       if (ret) {
+                       ret = move_freepages_block(zone, page, mt,
+                                                  ac->migratetype);
+                       /*
+                        * Reserving this block already succeeded, so this should
+                        * not fail on zone boundaries.
+                        */
+                       WARN_ON_ONCE(ret == -1);
+                       if (ret > 0) {
                                spin_unlock_irqrestore(&zone->lock, flags);
                                return ret;
                        }
@@ -2002,7 +2090,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
  * deviation from the rest of this file, to make the for loop
  * condition simpler.
  */
-static __always_inline bool
+static __always_inline struct page *
 __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
                                                unsigned int alloc_flags)
 {
@@ -2049,7 +2137,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
                goto do_steal;
        }
 
-       return false;
+       return NULL;
 
 find_smallest:
        for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
@@ -2069,14 +2157,14 @@ find_smallest:
 do_steal:
        page = get_page_from_free_area(area, fallback_mt);
 
-       steal_suitable_fallback(zone, page, alloc_flags, start_migratetype,
-                                                               can_steal);
+       /* take off list, maybe claim block, expand remainder */
+       page = steal_suitable_fallback(zone, page, current_order, order,
+                                      start_migratetype, alloc_flags, can_steal);
 
        trace_mm_page_alloc_extfrag(page, order, current_order,
                start_migratetype, fallback_mt);
 
-       return true;
-
+       return page;
 }
 
 /*
@@ -2103,15 +2191,15 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
                                return page;
                }
        }
-retry:
+
        page = __rmqueue_smallest(zone, order, migratetype);
        if (unlikely(!page)) {
                if (alloc_flags & ALLOC_CMA)
                        page = __rmqueue_cma_fallback(zone, order);
 
-               if (!page && __rmqueue_fallback(zone, order, migratetype,
-                                                               alloc_flags))
-                       goto retry;
+               if (!page)
+                       page = __rmqueue_fallback(zone, order, migratetype,
+                                                 alloc_flags);
        }
        return page;
 }
@@ -2146,12 +2234,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                 * pages are ordered properly.
                 */
                list_add_tail(&page->pcp_list, list);
-               if (is_migrate_cma(get_pcppage_migratetype(page)))
-                       __mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
-                                             -(1 << order));
        }
-
-       __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
        spin_unlock_irqrestore(&zone->lock, flags);
 
        return i;
@@ -2216,12 +2299,15 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
  */
 static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 {
-       struct per_cpu_pages *pcp;
+       struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+       int count = READ_ONCE(pcp->count);
+
+       while (count) {
+               int to_drain = min(count, pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
+               count -= to_drain;
 
-       pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
-       if (pcp->count) {
                spin_lock(&pcp->lock);
-               free_pcppages_bulk(zone, pcp->count, pcp, 0);
+               free_pcppages_bulk(zone, to_drain, pcp, 0);
                spin_unlock(&pcp->lock);
        }
 }
@@ -2339,19 +2425,6 @@ void drain_all_pages(struct zone *zone)
        __drain_all_pages(zone, false);
 }
 
-static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
-                                                       unsigned int order)
-{
-       int migratetype;
-
-       if (!free_pages_prepare(page, order))
-               return false;
-
-       migratetype = get_pfnblock_migratetype(page, pfn);
-       set_pcppage_migratetype(page, migratetype);
-       return true;
-}
-
 static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free_high)
 {
        int min_nr_free, max_nr_free;
@@ -2482,9 +2555,14 @@ void free_unref_page(struct page *page, unsigned int order)
        struct per_cpu_pages *pcp;
        struct zone *zone;
        unsigned long pfn = page_to_pfn(page);
-       int migratetype, pcpmigratetype;
+       int migratetype;
 
-       if (!free_unref_page_prepare(page, pfn, order))
+       if (!pcp_allowed_order(order)) {
+               __free_pages_ok(page, order, FPI_NONE);
+               return;
+       }
+
+       if (!free_pages_prepare(page, order))
                return;
 
        /*
@@ -2494,23 +2572,23 @@ void free_unref_page(struct page *page, unsigned int order)
         * get those areas back if necessary. Otherwise, we may have to free
         * excessively into the page allocator
         */
-       migratetype = pcpmigratetype = get_pcppage_migratetype(page);
+       migratetype = get_pfnblock_migratetype(page, pfn);
        if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
                if (unlikely(is_migrate_isolate(migratetype))) {
-                       free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
+                       free_one_page(page_zone(page), page, pfn, order, FPI_NONE);
                        return;
                }
-               pcpmigratetype = MIGRATE_MOVABLE;
+               migratetype = MIGRATE_MOVABLE;
        }
 
        zone = page_zone(page);
        pcp_trylock_prepare(UP_flags);
        pcp = pcp_spin_trylock(zone->per_cpu_pageset);
        if (pcp) {
-               free_unref_page_commit(zone, pcp, page, pcpmigratetype, order);
+               free_unref_page_commit(zone, pcp, page, migratetype, order);
                pcp_spin_unlock(pcp);
        } else {
-               free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
+               free_one_page(zone, page, pfn, order, FPI_NONE);
        }
        pcp_trylock_finish(UP_flags);
 }
@@ -2523,7 +2601,7 @@ void free_unref_folios(struct folio_batch *folios)
        unsigned long __maybe_unused UP_flags;
        struct per_cpu_pages *pcp = NULL;
        struct zone *locked_zone = NULL;
-       int i, j, migratetype;
+       int i, j;
 
        /* Prepare folios for freeing */
        for (i = 0, j = 0; i < folios->nr; i++) {
@@ -2533,18 +2611,15 @@ void free_unref_folios(struct folio_batch *folios)
 
                if (order > 0 && folio_test_large_rmappable(folio))
                        folio_undo_large_rmappable(folio);
-               if (!free_unref_page_prepare(&folio->page, pfn, order))
+               if (!free_pages_prepare(&folio->page, order))
                        continue;
-
                /*
-                * Free isolated folios and orders not handled on the PCP
-                * directly to the allocator, see comment in free_unref_page.
+                * Free orders not handled on the PCP directly to the
+                * allocator.
                 */
-               migratetype = get_pcppage_migratetype(&folio->page);
-               if (!pcp_allowed_order(order) ||
-                   is_migrate_isolate(migratetype)) {
-                       free_one_page(folio_zone(folio), &folio->page, pfn,
-                                       order, migratetype, FPI_NONE);
+               if (!pcp_allowed_order(order)) {
+                       free_one_page(folio_zone(folio), &folio->page,
+                                     pfn, order, FPI_NONE);
                        continue;
                }
                folio->private = (void *)(unsigned long)order;
@@ -2557,16 +2632,31 @@ void free_unref_folios(struct folio_batch *folios)
        for (i = 0; i < folios->nr; i++) {
                struct folio *folio = folios->folios[i];
                struct zone *zone = folio_zone(folio);
+               unsigned long pfn = folio_pfn(folio);
                unsigned int order = (unsigned long)folio->private;
+               int migratetype;
 
                folio->private = NULL;
-               migratetype = get_pcppage_migratetype(&folio->page);
+               migratetype = get_pfnblock_migratetype(&folio->page, pfn);
 
                /* Different zone requires a different pcp lock */
-               if (zone != locked_zone) {
+               if (zone != locked_zone ||
+                   is_migrate_isolate(migratetype)) {
                        if (pcp) {
                                pcp_spin_unlock(pcp);
                                pcp_trylock_finish(UP_flags);
+                               locked_zone = NULL;
+                               pcp = NULL;
+                       }
+
+                       /*
+                        * Free isolated pages directly to the
+                        * allocator, see comment in free_unref_page.
+                        */
+                       if (is_migrate_isolate(migratetype)) {
+                               free_one_page(zone, &folio->page, pfn,
+                                             order, FPI_NONE);
+                               continue;
                        }
 
                        /*
@@ -2577,10 +2667,8 @@ void free_unref_folios(struct folio_batch *folios)
                        pcp = pcp_spin_trylock(zone->per_cpu_pageset);
                        if (unlikely(!pcp)) {
                                pcp_trylock_finish(UP_flags);
-                               free_one_page(zone, &folio->page,
-                                               folio_pfn(folio), order,
-                                               migratetype, FPI_NONE);
-                               locked_zone = NULL;
+                               free_one_page(zone, &folio->page, pfn,
+                                             order, FPI_NONE);
                                continue;
                        }
                        locked_zone = zone;
@@ -2623,6 +2711,7 @@ void split_page(struct page *page, unsigned int order)
        for (i = 1; i < (1 << order); i++)
                set_page_refcounted(page + i);
        split_page_owner(page, order, 0);
+       pgalloc_tag_split(page, 1 << order);
        split_page_memcg(page, order, 0);
 }
 EXPORT_SYMBOL_GPL(split_page);
@@ -2643,11 +2732,9 @@ int __isolate_free_page(struct page *page, unsigned int order)
                watermark = zone->_watermark[WMARK_MIN] + (1UL << order);
                if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
                        return 0;
-
-               __mod_zone_freepage_state(zone, -(1UL << order), mt);
        }
 
-       del_page_from_free_list(page, zone, order);
+       del_page_from_free_list(page, zone, order, mt);
 
        /*
         * Set the pageblock if the isolated page is at least half of a
@@ -2662,8 +2749,8 @@ int __isolate_free_page(struct page *page, unsigned int order)
                         * with others)
                         */
                        if (migratetype_is_mergeable(mt))
-                               set_pageblock_migratetype(page,
-                                                         MIGRATE_MOVABLE);
+                               move_freepages_block(zone, page, mt,
+                                                    MIGRATE_MOVABLE);
                }
        }
 
@@ -2747,8 +2834,6 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
                                return NULL;
                        }
                }
-               __mod_zone_freepage_state(zone, -(1 << order),
-                                         get_pcppage_migratetype(page));
                spin_unlock_irqrestore(&zone->lock, flags);
        } while (check_new_pages(page, order));
 
@@ -4384,7 +4469,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
  *
  * Returns the number of pages on the list or array.
  */
-unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
+unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
                        nodemask_t *nodemask, int nr_pages,
                        struct list_head *page_list,
                        struct page **page_array)
@@ -4520,7 +4605,7 @@ failed_irq:
        pcp_trylock_finish(UP_flags);
 
 failed:
-       page = __alloc_pages(gfp, 0, preferred_nid, nodemask);
+       page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
        if (page) {
                if (page_list)
                        list_add(&page->lru, page_list);
@@ -4531,13 +4616,13 @@ failed:
 
        goto out;
 }
-EXPORT_SYMBOL_GPL(__alloc_pages_bulk);
+EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof);
 
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
-struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
-                                                       nodemask_t *nodemask)
+struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
+                                     int preferred_nid, nodemask_t *nodemask)
 {
        struct page *page;
        unsigned int alloc_flags = ALLOC_WMARK_LOW;
@@ -4599,38 +4684,38 @@ out:
 
        return page;
 }
-EXPORT_SYMBOL(__alloc_pages);
+EXPORT_SYMBOL(__alloc_pages_noprof);
 
-struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
+struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
                nodemask_t *nodemask)
 {
-       struct page *page = __alloc_pages(gfp | __GFP_COMP, order,
+       struct page *page = __alloc_pages_noprof(gfp | __GFP_COMP, order,
                                        preferred_nid, nodemask);
        return page_rmappable_folio(page);
 }
-EXPORT_SYMBOL(__folio_alloc);
+EXPORT_SYMBOL(__folio_alloc_noprof);
 
 /*
  * Common helper functions. Never use with __GFP_HIGHMEM because the returned
  * address cannot represent highmem pages. Use alloc_pages and then kmap if
  * you need to access high mem.
  */
-unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
+unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order)
 {
        struct page *page;
 
-       page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order);
+       page = alloc_pages_noprof(gfp_mask & ~__GFP_HIGHMEM, order);
        if (!page)
                return 0;
        return (unsigned long) page_address(page);
 }
-EXPORT_SYMBOL(__get_free_pages);
+EXPORT_SYMBOL(get_free_pages_noprof);
 
-unsigned long get_zeroed_page(gfp_t gfp_mask)
+unsigned long get_zeroed_page_noprof(gfp_t gfp_mask)
 {
-       return __get_free_page(gfp_mask | __GFP_ZERO);
+       return get_free_pages_noprof(gfp_mask | __GFP_ZERO, 0);
 }
-EXPORT_SYMBOL(get_zeroed_page);
+EXPORT_SYMBOL(get_zeroed_page_noprof);
 
 /**
  * __free_pages - Free pages allocated with alloc_pages().
@@ -4656,12 +4741,15 @@ void __free_pages(struct page *page, unsigned int order)
 {
        /* get PageHead before we drop reference */
        int head = PageHead(page);
+       struct alloc_tag *tag = pgalloc_tag_get(page);
 
        if (put_page_testzero(page))
-               free_the_page(page, order);
-       else if (!head)
+               free_unref_page(page, order);
+       else if (!head) {
+               pgalloc_tag_sub_pages(tag, (1 << order) - 1);
                while (order-- > 0)
-                       free_the_page(page + (1 << order), order);
+                       free_unref_page(page + (1 << order), order);
+       }
 }
 EXPORT_SYMBOL(__free_pages);
 
@@ -4722,7 +4810,7 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
        VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
 
        if (page_ref_sub_and_test(page, count))
-               free_the_page(page, compound_order(page));
+               free_unref_page(page, compound_order(page));
 }
 EXPORT_SYMBOL(__page_frag_cache_drain);
 
@@ -4763,7 +4851,7 @@ refill:
                        goto refill;
 
                if (unlikely(nc->pfmemalloc)) {
-                       free_the_page(page, compound_order(page));
+                       free_unref_page(page, compound_order(page));
                        goto refill;
                }
 
@@ -4807,7 +4895,7 @@ void page_frag_free(void *addr)
        struct page *page = virt_to_head_page(addr);
 
        if (unlikely(put_page_testzero(page)))
-               free_the_page(page, compound_order(page));
+               free_unref_page(page, compound_order(page));
 }
 EXPORT_SYMBOL(page_frag_free);
 
@@ -4820,6 +4908,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
                struct page *last = page + nr;
 
                split_page_owner(page, order, 0);
+               pgalloc_tag_split(page, 1 << order);
                split_page_memcg(page, order, 0);
                while (page < --last)
                        set_page_refcounted(last);
@@ -4846,7 +4935,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
  *
  * Return: pointer to the allocated area or %NULL in case of error.
  */
-void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
+void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask)
 {
        unsigned int order = get_order(size);
        unsigned long addr;
@@ -4854,10 +4943,10 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask)
        if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
                gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
 
-       addr = __get_free_pages(gfp_mask, order);
+       addr = get_free_pages_noprof(gfp_mask, order);
        return make_alloc_exact(addr, order, size);
 }
-EXPORT_SYMBOL(alloc_pages_exact);
+EXPORT_SYMBOL(alloc_pages_exact_noprof);
 
 /**
  * alloc_pages_exact_nid - allocate an exact number of physically-contiguous
@@ -4871,7 +4960,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
  *
  * Return: pointer to the allocated area or %NULL in case of error.
  */
-void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
+void * __meminit alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask)
 {
        unsigned int order = get_order(size);
        struct page *p;
@@ -4879,7 +4968,7 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
        if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM)))
                gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM);
 
-       p = alloc_pages_node(nid, gfp_mask, order);
+       p = alloc_pages_node_noprof(nid, gfp_mask, order);
        if (!p)
                return NULL;
        return make_alloc_exact((unsigned long)page_address(p), order, size);
@@ -5180,37 +5269,13 @@ static void setup_min_slab_ratio(void);
 
 static void build_zonelists(pg_data_t *pgdat)
 {
-       int node, local_node;
        struct zoneref *zonerefs;
        int nr_zones;
 
-       local_node = pgdat->node_id;
-
        zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs;
        nr_zones = build_zonerefs_node(pgdat, zonerefs);
        zonerefs += nr_zones;
 
-       /*
-        * Now we build the zonelist so that it contains the zones
-        * of all the other nodes.
-        * We don't want to pressure a particular node, so when
-        * building the zones for node N, we make sure that the
-        * zones coming right after the local ones are those from
-        * node N+1 (modulo N)
-        */
-       for (node = local_node + 1; node < MAX_NUMNODES; node++) {
-               if (!node_online(node))
-                       continue;
-               nr_zones = build_zonerefs_node(NODE_DATA(node), zonerefs);
-               zonerefs += nr_zones;
-       }
-       for (node = 0; node < local_node; node++) {
-               if (!node_online(node))
-                       continue;
-               nr_zones = build_zonerefs_node(NODE_DATA(node), zonerefs);
-               zonerefs += nr_zones;
-       }
-
        zonerefs->zone = NULL;
        zonerefs->zone_idx = 0;
 }
@@ -5827,10 +5892,11 @@ static void setup_per_zone_lowmem_reserve(void)
 
                        for (j = i + 1; j < MAX_NR_ZONES; j++) {
                                struct zone *upper_zone = &pgdat->node_zones[j];
+                               bool empty = !zone_managed_pages(upper_zone);
 
                                managed_pages += zone_managed_pages(upper_zone);
 
-                               if (clear)
+                               if (clear || empty)
                                        zone->lowmem_reserve[j] = 0;
                                else
                                        zone->lowmem_reserve[j] = managed_pages / ratio;
@@ -6211,7 +6277,6 @@ static struct ctl_table page_alloc_sysctl_table[] = {
                .extra2         = SYSCTL_ONE_HUNDRED,
        },
 #endif
-       {}
 };
 
 void __init page_alloc_sysctl_init(void)
@@ -6251,6 +6316,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
        struct migration_target_control mtc = {
                .nid = zone_to_nid(cc->zone),
                .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+               .reason = MR_CONTIG_RANGE,
        };
        struct page *page;
        unsigned long total_mapped = 0;
@@ -6283,8 +6349,12 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
 
                if (trace_mm_alloc_contig_migrate_range_info_enabled()) {
                        total_reclaimed += nr_reclaimed;
-                       list_for_each_entry(page, &cc->migratepages, lru)
-                               total_mapped += page_mapcount(page);
+                       list_for_each_entry(page, &cc->migratepages, lru) {
+                               struct folio *folio = page_folio(page);
+
+                               total_mapped += folio_mapped(folio) *
+                                               folio_nr_pages(folio);
+                       }
                }
 
                ret = migrate_pages(&cc->migratepages, alloc_migration_target,
@@ -6336,11 +6406,10 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
  * pages which PFN is in [start, end) are allocated for the caller and
  * need to be freed with free_contig_range().
  */
-int alloc_contig_range(unsigned long start, unsigned long end,
+int alloc_contig_range_noprof(unsigned long start, unsigned long end,
                       unsigned migratetype, gfp_t gfp_mask)
 {
        unsigned long outer_start, outer_end;
-       int order;
        int ret = 0;
 
        struct compact_control cc = {
@@ -6413,29 +6482,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
         * We don't have to hold zone->lock here because the pages are
         * isolated thus they won't get removed from buddy.
         */
-
-       order = 0;
-       outer_start = start;
-       while (!PageBuddy(pfn_to_page(outer_start))) {
-               if (++order > MAX_PAGE_ORDER) {
-                       outer_start = start;
-                       break;
-               }
-               outer_start &= ~0UL << order;
-       }
-
-       if (outer_start != start) {
-               order = buddy_order(pfn_to_page(outer_start));
-
-               /*
-                * outer_start page could be small order buddy page and
-                * it doesn't include start page. Adjust outer_start
-                * in this case to report failed page properly
-                * on tracepoint in test_pages_isolated()
-                */
-               if (outer_start + (1UL << order) <= start)
-                       outer_start = start;
-       }
+       outer_start = find_large_buddy(start);
 
        /* Make sure the range is really isolated. */
        if (test_pages_isolated(outer_start, end, 0)) {
@@ -6460,15 +6507,15 @@ done:
        undo_isolate_page_range(start, end, migratetype);
        return ret;
 }
-EXPORT_SYMBOL(alloc_contig_range);
+EXPORT_SYMBOL(alloc_contig_range_noprof);
 
 static int __alloc_contig_pages(unsigned long start_pfn,
                                unsigned long nr_pages, gfp_t gfp_mask)
 {
        unsigned long end_pfn = start_pfn + nr_pages;
 
-       return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE,
-                                 gfp_mask);
+       return alloc_contig_range_noprof(start_pfn, end_pfn, MIGRATE_MOVABLE,
+                                  gfp_mask);
 }
 
 static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
@@ -6523,8 +6570,8 @@ static bool zone_spans_last_pfn(const struct zone *zone,
  *
  * Return: pointer to contiguous pages on success, or NULL if not successful.
  */
-struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
-                               int nid, nodemask_t *nodemask)
+struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
+                                int nid, nodemask_t *nodemask)
 {
        unsigned long ret, pfn, flags;
        struct zonelist *zonelist;
@@ -6655,8 +6702,9 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 
                BUG_ON(page_count(page));
                BUG_ON(!PageBuddy(page));
+               VM_WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE);
                order = buddy_order(page);
-               del_page_from_free_list(page, zone, order);
+               del_page_from_free_list(page, zone, order, MIGRATE_ISOLATE);
                pfn += (1 << order);
        }
        spin_unlock_irqrestore(&zone->lock, flags);
@@ -6666,16 +6714,16 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 /*
  * This function returns a stable result only if called under zone lock.
  */
-bool is_free_buddy_page(struct page *page)
+bool is_free_buddy_page(const struct page *page)
 {
        unsigned long pfn = page_to_pfn(page);
        unsigned int order;
 
        for (order = 0; order < NR_PAGE_ORDERS; order++) {
-               struct page *page_head = page - (pfn & ((1 << order) - 1));
+               const struct page *head = page - (pfn & ((1 << order) - 1));
 
-               if (PageBuddy(page_head) &&
-                   buddy_order_unsafe(page_head) >= order)
+               if (PageBuddy(head) &&
+                   buddy_order_unsafe(head) >= order)
                        break;
        }
 
@@ -6684,6 +6732,14 @@ bool is_free_buddy_page(struct page *page)
 EXPORT_SYMBOL(is_free_buddy_page);
 
 #ifdef CONFIG_MEMORY_FAILURE
+static inline void add_to_free_list(struct page *page, struct zone *zone,
+                                   unsigned int order, int migratetype,
+                                   bool tail)
+{
+       __add_to_free_list(page, zone, order, migratetype, tail);
+       account_freepages(zone, 1 << order, migratetype);
+}
+
 /*
  * Break down a higher-order page in sub-pages, and keep our target out of
  * buddy allocator.
@@ -6706,10 +6762,10 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page,
                        current_buddy = page + size;
                }
 
-               if (set_page_guard(zone, current_buddy, high, migratetype))
+               if (set_page_guard(zone, current_buddy, high))
                        continue;
 
-               add_to_free_list(current_buddy, zone, high, migratetype);
+               add_to_free_list(current_buddy, zone, high, migratetype, false);
                set_buddy_order(current_buddy, high);
        }
 }
@@ -6735,12 +6791,11 @@ bool take_page_off_buddy(struct page *page)
                        int migratetype = get_pfnblock_migratetype(page_head,
                                                                   pfn_head);
 
-                       del_page_from_free_list(page_head, zone, page_order);
+                       del_page_from_free_list(page_head, zone, page_order,
+                                               migratetype);
                        break_down_buddy_pages(zone, page_head, page, 0,
                                                page_order, migratetype);
                        SetPageHWPoisonTakenOff(page);
-                       if (!is_migrate_isolate(migratetype))
-                               __mod_zone_freepage_state(zone, -1, migratetype);
                        ret = true;
                        break;
                }
@@ -6757,13 +6812,14 @@ bool take_page_off_buddy(struct page *page)
 bool put_page_back_buddy(struct page *page)
 {
        struct zone *zone = page_zone(page);
-       unsigned long pfn = page_to_pfn(page);
        unsigned long flags;
-       int migratetype = get_pfnblock_migratetype(page, pfn);
        bool ret = false;
 
        spin_lock_irqsave(&zone->lock, flags);
        if (put_page_testzero(page)) {
+               unsigned long pfn = page_to_pfn(page);
+               int migratetype = get_pfnblock_migratetype(page, pfn);
+
                ClearPageHWPoisonTakenOff(page);
                __free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE);
                if (TestClearPageHWPoison(page)) {
@@ -6847,7 +6903,7 @@ static bool try_to_accept_memory_one(struct zone *zone)
        list_del(&page->lru);
        last = list_empty(&zone->unaccepted_pages);
 
-       __mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
+       account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
        __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
        spin_unlock_irqrestore(&zone->lock, flags);
 
@@ -6899,7 +6955,7 @@ static bool __free_unaccepted(struct page *page)
        spin_lock_irqsave(&zone->lock, flags);
        first = list_empty(&zone->unaccepted_pages);
        list_add_tail(&page->lru, &zone->unaccepted_pages);
-       __mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
+       account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
        __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
        spin_unlock_irqrestore(&zone->lock, flags);