]> git.samba.org - sfrench/cifs-2.6.git/blobdiff - mm/hugetlb.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[sfrench/cifs-2.6.git] / mm / hugetlb.c
index 3d9f4abec17c61e6af64d51ef74612e908371aca..77f36e3681e390dc7f1382cd4dc9bca84b09a611 100644 (file)
@@ -54,13 +54,13 @@ struct hstate hstates[HUGE_MAX_HSTATE];
 #ifdef CONFIG_CMA
 static struct cma *hugetlb_cma[MAX_NUMNODES];
 static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
-static bool hugetlb_cma_page(struct page *page, unsigned int order)
+static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
 {
-       return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page,
+       return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
                                1 << order);
 }
 #else
-static bool hugetlb_cma_page(struct page *page, unsigned int order)
+static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
 {
        return false;
 }
@@ -1127,17 +1127,17 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
        return false;
 }
 
-static void enqueue_huge_page(struct hstate *h, struct page *page)
+static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio)
 {
-       int nid = page_to_nid(page);
+       int nid = folio_nid(folio);
 
        lockdep_assert_held(&hugetlb_lock);
-       VM_BUG_ON_PAGE(page_count(page), page);
+       VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
 
-       list_move(&page->lru, &h->hugepage_freelists[nid]);
+       list_move(&folio->lru, &h->hugepage_freelists[nid]);
        h->free_huge_pages++;
        h->free_huge_pages_node[nid]++;
-       SetHPageFreed(page);
+       folio_set_hugetlb_freed(folio);
 }
 
 static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
@@ -1325,76 +1325,76 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
                nr_nodes--)
 
 /* used to demote non-gigantic_huge pages as well */
-static void __destroy_compound_gigantic_page(struct page *page,
+static void __destroy_compound_gigantic_folio(struct folio *folio,
                                        unsigned int order, bool demote)
 {
        int i;
        int nr_pages = 1 << order;
        struct page *p;
 
-       atomic_set(compound_mapcount_ptr(page), 0);
-       atomic_set(compound_pincount_ptr(page), 0);
+       atomic_set(folio_mapcount_ptr(folio), 0);
+       atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+       atomic_set(folio_pincount_ptr(folio), 0);
 
        for (i = 1; i < nr_pages; i++) {
-               p = nth_page(page, i);
+               p = folio_page(folio, i);
                p->mapping = NULL;
                clear_compound_head(p);
                if (!demote)
                        set_page_refcounted(p);
        }
 
-       set_compound_order(page, 0);
-#ifdef CONFIG_64BIT
-       page[1].compound_nr = 0;
-#endif
-       __ClearPageHead(page);
+       folio_set_compound_order(folio, 0);
+       __folio_clear_head(folio);
 }
 
-static void destroy_compound_hugetlb_page_for_demote(struct page *page,
+static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio,
                                        unsigned int order)
 {
-       __destroy_compound_gigantic_page(page, order, true);
+       __destroy_compound_gigantic_folio(folio, order, true);
 }
 
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static void destroy_compound_gigantic_page(struct page *page,
+static void destroy_compound_gigantic_folio(struct folio *folio,
                                        unsigned int order)
 {
-       __destroy_compound_gigantic_page(page, order, false);
+       __destroy_compound_gigantic_folio(folio, order, false);
 }
 
-static void free_gigantic_page(struct page *page, unsigned int order)
+static void free_gigantic_folio(struct folio *folio, unsigned int order)
 {
        /*
         * If the page isn't allocated using the cma allocator,
         * cma_release() returns false.
         */
 #ifdef CONFIG_CMA
-       if (cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+       int nid = folio_nid(folio);
+
+       if (cma_release(hugetlb_cma[nid], &folio->page, 1 << order))
                return;
 #endif
 
-       free_contig_range(page_to_pfn(page), 1 << order);
+       free_contig_range(folio_pfn(folio), 1 << order);
 }
 
 #ifdef CONFIG_CONTIG_ALLOC
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
                int nid, nodemask_t *nodemask)
 {
+       struct page *page;
        unsigned long nr_pages = pages_per_huge_page(h);
        if (nid == NUMA_NO_NODE)
                nid = numa_mem_id();
 
 #ifdef CONFIG_CMA
        {
-               struct page *page;
                int node;
 
                if (hugetlb_cma[nid]) {
                        page = cma_alloc(hugetlb_cma[nid], nr_pages,
                                        huge_page_order(h), true);
                        if (page)
-                               return page;
+                               return page_folio(page);
                }
 
                if (!(gfp_mask & __GFP_THISNODE)) {
@@ -1405,17 +1405,18 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
                                page = cma_alloc(hugetlb_cma[node], nr_pages,
                                                huge_page_order(h), true);
                                if (page)
-                                       return page;
+                                       return page_folio(page);
                        }
                }
        }
 #endif
 
-       return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+       page = alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+       return page ? page_folio(page) : NULL;
 }
 
 #else /* !CONFIG_CONTIG_ALLOC */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
                                        int nid, nodemask_t *nodemask)
 {
        return NULL;
@@ -1423,40 +1424,41 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
 #endif /* CONFIG_CONTIG_ALLOC */
 
 #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
                                        int nid, nodemask_t *nodemask)
 {
        return NULL;
 }
-static inline void free_gigantic_page(struct page *page, unsigned int order) { }
-static inline void destroy_compound_gigantic_page(struct page *page,
+static inline void free_gigantic_folio(struct folio *folio,
+                                               unsigned int order) { }
+static inline void destroy_compound_gigantic_folio(struct folio *folio,
                                                unsigned int order) { }
 #endif
 
 /*
- * Remove hugetlb page from lists, and update dtor so that page appears
+ * Remove hugetlb folio from lists, and update dtor so that the folio appears
  * as just a compound page.
  *
- * A reference is held on the page, except in the case of demote.
+ * A reference is held on the folio, except in the case of demote.
  *
  * Must be called with hugetlb lock held.
  */
-static void __remove_hugetlb_page(struct hstate *h, struct page *page,
+static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
                                                        bool adjust_surplus,
                                                        bool demote)
 {
-       int nid = page_to_nid(page);
+       int nid = folio_nid(folio);
 
-       VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
-       VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page);
+       VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio);
+       VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio);
 
        lockdep_assert_held(&hugetlb_lock);
        if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
                return;
 
-       list_del(&page->lru);
+       list_del(&folio->lru);
 
-       if (HPageFreed(page)) {
+       if (folio_test_hugetlb_freed(folio)) {
                h->free_huge_pages--;
                h->free_huge_pages_node[nid]--;
        }
@@ -1475,50 +1477,50 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page,
         *
         * For gigantic pages set the destructor to the null dtor.  This
         * destructor will never be called.  Before freeing the gigantic
-        * page destroy_compound_gigantic_page will turn the compound page
-        * into a simple group of pages.  After this the destructor does not
+        * page destroy_compound_gigantic_folio will turn the folio into a
+        * simple group of pages.  After this the destructor does not
         * apply.
         *
         * This handles the case where more than one ref is held when and
-        * after update_and_free_page is called.
+        * after update_and_free_hugetlb_folio is called.
         *
         * In the case of demote we do not ref count the page as it will soon
         * be turned into a page of smaller size.
         */
        if (!demote)
-               set_page_refcounted(page);
+               folio_ref_unfreeze(folio, 1);
        if (hstate_is_gigantic(h))
-               set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+               folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
        else
-               set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
+               folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
 
        h->nr_huge_pages--;
        h->nr_huge_pages_node[nid]--;
 }
 
-static void remove_hugetlb_page(struct hstate *h, struct page *page,
+static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
                                                        bool adjust_surplus)
 {
-       __remove_hugetlb_page(h, page, adjust_surplus, false);
+       __remove_hugetlb_folio(h, folio, adjust_surplus, false);
 }
 
-static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page,
+static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
                                                        bool adjust_surplus)
 {
-       __remove_hugetlb_page(h, page, adjust_surplus, true);
+       __remove_hugetlb_folio(h, folio, adjust_surplus, true);
 }
 
-static void add_hugetlb_page(struct hstate *h, struct page *page,
+static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
                             bool adjust_surplus)
 {
        int zeroed;
-       int nid = page_to_nid(page);
+       int nid = folio_nid(folio);
 
-       VM_BUG_ON_PAGE(!HPageVmemmapOptimized(page), page);
+       VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
 
        lockdep_assert_held(&hugetlb_lock);
 
-       INIT_LIST_HEAD(&page->lru);
+       INIT_LIST_HEAD(&folio->lru);
        h->nr_huge_pages++;
        h->nr_huge_pages_node[nid]++;
 
@@ -1527,21 +1529,21 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
                h->surplus_huge_pages_node[nid]++;
        }
 
-       set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
-       set_page_private(page, 0);
+       folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
+       folio_change_private(folio, NULL);
        /*
-        * We have to set HPageVmemmapOptimized again as above
-        * set_page_private(page, 0) cleared it.
+        * We have to set hugetlb_vmemmap_optimized again as above
+        * folio_change_private(folio, NULL) cleared it.
         */
-       SetHPageVmemmapOptimized(page);
+       folio_set_hugetlb_vmemmap_optimized(folio);
 
        /*
-        * This page is about to be managed by the hugetlb allocator and
+        * This folio is about to be managed by the hugetlb allocator and
         * should have no users.  Drop our reference, and check for others
         * just in case.
         */
-       zeroed = put_page_testzero(page);
-       if (!zeroed)
+       zeroed = folio_put_testzero(folio);
+       if (unlikely(!zeroed))
                /*
                 * It is VERY unlikely soneone else has taken a ref on
                 * the page.  In this case, we simply return as the
@@ -1550,13 +1552,14 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
                 */
                return;
 
-       arch_clear_hugepage_flags(page);
-       enqueue_huge_page(h, page);
+       arch_clear_hugepage_flags(&folio->page);
+       enqueue_hugetlb_folio(h, folio);
 }
 
 static void __update_and_free_page(struct hstate *h, struct page *page)
 {
        int i;
+       struct folio *folio = page_folio(page);
        struct page *subpage;
 
        if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
@@ -1566,7 +1569,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
         * If we don't know which subpages are hwpoisoned, we can't free
         * the hugepage, so it's leaked intentionally.
         */
-       if (HPageRawHwpUnreliable(page))
+       if (folio_test_hugetlb_raw_hwp_unreliable(folio))
                return;
 
        if (hugetlb_vmemmap_restore(h, page)) {
@@ -1576,7 +1579,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
                 * page and put the page back on the hugetlb free list and treat
                 * as a surplus page.
                 */
-               add_hugetlb_page(h, page, true);
+               add_hugetlb_folio(h, folio, true);
                spin_unlock_irq(&hugetlb_lock);
                return;
        }
@@ -1585,11 +1588,11 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
         * Move PageHWPoison flag from head page to the raw error pages,
         * which makes any healthy subpages reusable.
         */
-       if (unlikely(PageHWPoison(page)))
-               hugetlb_clear_page_hwpoison(page);
+       if (unlikely(folio_test_hwpoison(folio)))
+               hugetlb_clear_page_hwpoison(&folio->page);
 
        for (i = 0; i < pages_per_huge_page(h); i++) {
-               subpage = nth_page(page, i);
+               subpage = folio_page(folio, i);
                subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
                                1 << PG_referenced | 1 << PG_dirty |
                                1 << PG_active | 1 << PG_private |
@@ -1598,19 +1601,19 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 
        /*
         * Non-gigantic pages demoted from CMA allocated gigantic pages
-        * need to be given back to CMA in free_gigantic_page.
+        * need to be given back to CMA in free_gigantic_folio.
         */
        if (hstate_is_gigantic(h) ||
-           hugetlb_cma_page(page, huge_page_order(h))) {
-               destroy_compound_gigantic_page(page, huge_page_order(h));
-               free_gigantic_page(page, huge_page_order(h));
+           hugetlb_cma_folio(folio, huge_page_order(h))) {
+               destroy_compound_gigantic_folio(folio, huge_page_order(h));
+               free_gigantic_folio(folio, huge_page_order(h));
        } else {
                __free_pages(page, huge_page_order(h));
        }
 }
 
 /*
- * As update_and_free_page() can be called under any context, so we cannot
+ * As update_and_free_hugetlb_folio() can be called under any context, so we cannot
  * use GFP_KERNEL to allocate vmemmap pages. However, we can defer the
  * actual freeing in a workqueue to prevent from using GFP_ATOMIC to allocate
  * the vmemmap pages.
@@ -1639,8 +1642,9 @@ static void free_hpage_workfn(struct work_struct *work)
                /*
                 * The VM_BUG_ON_PAGE(!PageHuge(page), page) in page_hstate()
                 * is going to trigger because a previous call to
-                * remove_hugetlb_page() will set_compound_page_dtor(page,
-                * NULL_COMPOUND_DTOR), so do not use page_hstate() directly.
+                * remove_hugetlb_folio() will call folio_set_compound_dtor
+                * (folio, NULL_COMPOUND_DTOR), so do not use page_hstate()
+                * directly.
                 */
                h = size_to_hstate(page_size(page));
 
@@ -1657,11 +1661,11 @@ static inline void flush_free_hpage_work(struct hstate *h)
                flush_work(&free_hpage_work);
 }
 
-static void update_and_free_page(struct hstate *h, struct page *page,
+static void update_and_free_hugetlb_folio(struct hstate *h, struct folio *folio,
                                 bool atomic)
 {
-       if (!HPageVmemmapOptimized(page) || !atomic) {
-               __update_and_free_page(h, page);
+       if (!folio_test_hugetlb_vmemmap_optimized(folio) || !atomic) {
+               __update_and_free_page(h, &folio->page);
                return;
        }
 
@@ -1672,16 +1676,18 @@ static void update_and_free_page(struct hstate *h, struct page *page,
         * empty. Otherwise, schedule_work() had been called but the workfn
         * hasn't retrieved the list yet.
         */
-       if (llist_add((struct llist_node *)&page->mapping, &hpage_freelist))
+       if (llist_add((struct llist_node *)&folio->mapping, &hpage_freelist))
                schedule_work(&free_hpage_work);
 }
 
 static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list)
 {
        struct page *page, *t_page;
+       struct folio *folio;
 
        list_for_each_entry_safe(page, t_page, list, lru) {
-               update_and_free_page(h, page, false);
+               folio = page_folio(page);
+               update_and_free_hugetlb_folio(h, folio, false);
                cond_resched();
        }
 }
@@ -1703,21 +1709,22 @@ void free_huge_page(struct page *page)
         * Can't pass hstate in here because it is called from the
         * compound page destructor.
         */
-       struct hstate *h = page_hstate(page);
-       int nid = page_to_nid(page);
-       struct hugepage_subpool *spool = hugetlb_page_subpool(page);
+       struct folio *folio = page_folio(page);
+       struct hstate *h = folio_hstate(folio);
+       int nid = folio_nid(folio);
+       struct hugepage_subpool *spool = hugetlb_folio_subpool(folio);
        bool restore_reserve;
        unsigned long flags;
 
-       VM_BUG_ON_PAGE(page_count(page), page);
-       VM_BUG_ON_PAGE(page_mapcount(page), page);
+       VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
+       VM_BUG_ON_FOLIO(folio_mapcount(folio), folio);
 
-       hugetlb_set_page_subpool(page, NULL);
-       if (PageAnon(page))
-               __ClearPageAnonExclusive(page);
-       page->mapping = NULL;
-       restore_reserve = HPageRestoreReserve(page);
-       ClearHPageRestoreReserve(page);
+       hugetlb_set_folio_subpool(folio, NULL);
+       if (folio_test_anon(folio))
+               __ClearPageAnonExclusive(&folio->page);
+       folio->mapping = NULL;
+       restore_reserve = folio_test_hugetlb_restore_reserve(folio);
+       folio_clear_hugetlb_restore_reserve(folio);
 
        /*
         * If HPageRestoreReserve was set on page, page allocation consumed a
@@ -1739,26 +1746,26 @@ void free_huge_page(struct page *page)
        }
 
        spin_lock_irqsave(&hugetlb_lock, flags);
-       ClearHPageMigratable(page);
-       hugetlb_cgroup_uncharge_page(hstate_index(h),
-                                    pages_per_huge_page(h), page);
-       hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
-                                         pages_per_huge_page(h), page);
+       folio_clear_hugetlb_migratable(folio);
+       hugetlb_cgroup_uncharge_folio(hstate_index(h),
+                                    pages_per_huge_page(h), folio);
+       hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+                                         pages_per_huge_page(h), folio);
        if (restore_reserve)
                h->resv_huge_pages++;
 
-       if (HPageTemporary(page)) {
-               remove_hugetlb_page(h, page, false);
+       if (folio_test_hugetlb_temporary(folio)) {
+               remove_hugetlb_folio(h, folio, false);
                spin_unlock_irqrestore(&hugetlb_lock, flags);
-               update_and_free_page(h, page, true);
+               update_and_free_hugetlb_folio(h, folio, true);
        } else if (h->surplus_huge_pages_node[nid]) {
                /* remove the page from active list */
-               remove_hugetlb_page(h, page, true);
+               remove_hugetlb_folio(h, folio, true);
                spin_unlock_irqrestore(&hugetlb_lock, flags);
-               update_and_free_page(h, page, true);
+               update_and_free_hugetlb_folio(h, folio, true);
        } else {
                arch_clear_hugepage_flags(page);
-               enqueue_huge_page(h, page);
+               enqueue_hugetlb_folio(h, folio);
                spin_unlock_irqrestore(&hugetlb_lock, flags);
        }
 }
@@ -1773,37 +1780,37 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
        h->nr_huge_pages_node[nid]++;
 }
 
-static void __prep_new_huge_page(struct hstate *h, struct page *page)
+static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
 {
-       hugetlb_vmemmap_optimize(h, page);
-       INIT_LIST_HEAD(&page->lru);
-       set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
-       hugetlb_set_page_subpool(page, NULL);
-       set_hugetlb_cgroup(page, NULL);
-       set_hugetlb_cgroup_rsvd(page, NULL);
+       hugetlb_vmemmap_optimize(h, &folio->page);
+       INIT_LIST_HEAD(&folio->lru);
+       folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
+       hugetlb_set_folio_subpool(folio, NULL);
+       set_hugetlb_cgroup(folio, NULL);
+       set_hugetlb_cgroup_rsvd(folio, NULL);
 }
 
-static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
 {
-       __prep_new_huge_page(h, page);
+       __prep_new_hugetlb_folio(h, folio);
        spin_lock_irq(&hugetlb_lock);
        __prep_account_new_huge_page(h, nid);
        spin_unlock_irq(&hugetlb_lock);
 }
 
-static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
-                                                               bool demote)
+static bool __prep_compound_gigantic_folio(struct folio *folio,
+                                       unsigned int order, bool demote)
 {
        int i, j;
        int nr_pages = 1 << order;
        struct page *p;
 
-       /* we rely on prep_new_huge_page to set the destructor */
-       set_compound_order(page, order);
-       __ClearPageReserved(page);
-       __SetPageHead(page);
+       __folio_clear_reserved(folio);
+       __folio_set_head(folio);
+       /* we rely on prep_new_hugetlb_folio to set the destructor */
+       folio_set_compound_order(folio, order);
        for (i = 0; i < nr_pages; i++) {
-               p = nth_page(page, i);
+               p = folio_page(folio, i);
 
                /*
                 * For gigantic hugepages allocated through bootmem at
@@ -1845,42 +1852,41 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
                        VM_BUG_ON_PAGE(page_count(p), p);
                }
                if (i != 0)
-                       set_compound_head(p, page);
+                       set_compound_head(p, &folio->page);
        }
-       atomic_set(compound_mapcount_ptr(page), -1);
-       atomic_set(compound_pincount_ptr(page), 0);
+       atomic_set(folio_mapcount_ptr(folio), -1);
+       atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+       atomic_set(folio_pincount_ptr(folio), 0);
        return true;
 
 out_error:
        /* undo page modifications made above */
        for (j = 0; j < i; j++) {
-               p = nth_page(page, j);
+               p = folio_page(folio, j);
                if (j != 0)
                        clear_compound_head(p);
                set_page_refcounted(p);
        }
        /* need to clear PG_reserved on remaining tail pages  */
        for (; j < nr_pages; j++) {
-               p = nth_page(page, j);
+               p = folio_page(folio, j);
                __ClearPageReserved(p);
        }
-       set_compound_order(page, 0);
-#ifdef CONFIG_64BIT
-       page[1].compound_nr = 0;
-#endif
-       __ClearPageHead(page);
+       folio_set_compound_order(folio, 0);
+       __folio_clear_head(folio);
        return false;
 }
 
-static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
+static bool prep_compound_gigantic_folio(struct folio *folio,
+                                                       unsigned int order)
 {
-       return __prep_compound_gigantic_page(page, order, false);
+       return __prep_compound_gigantic_folio(folio, order, false);
 }
 
-static bool prep_compound_gigantic_page_for_demote(struct page *page,
+static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
                                                        unsigned int order)
 {
-       return __prep_compound_gigantic_page(page, order, true);
+       return __prep_compound_gigantic_folio(folio, order, true);
 }
 
 /*
@@ -1945,7 +1951,7 @@ pgoff_t hugetlb_basepage_index(struct page *page)
        return (index << compound_order(page_head)) + compound_idx;
 }
 
-static struct page *alloc_buddy_huge_page(struct hstate *h,
+static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
                gfp_t gfp_mask, int nid, nodemask_t *nmask,
                nodemask_t *node_alloc_noretry)
 {
@@ -1983,11 +1989,6 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
                page = NULL;
        }
 
-       if (page)
-               __count_vm_event(HTLB_BUDDY_PGALLOC);
-       else
-               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
-
        /*
         * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
         * indicates an overall state change.  Clear bit so that we resume
@@ -2004,7 +2005,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
        if (node_alloc_noretry && !page && alloc_try_hard)
                node_set(nid, *node_alloc_noretry);
 
-       return page;
+       if (!page) {
+               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
+               return NULL;
+       }
+
+       __count_vm_event(HTLB_BUDDY_PGALLOC);
+       return page_folio(page);
 }
 
 /*
@@ -2014,29 +2021,28 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
  * Note that returned page is 'frozen':  ref count of head page and all tail
  * pages is zero.
  */
-static struct page *alloc_fresh_huge_page(struct hstate *h,
+static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
                gfp_t gfp_mask, int nid, nodemask_t *nmask,
                nodemask_t *node_alloc_noretry)
 {
-       struct page *page;
+       struct folio *folio;
        bool retry = false;
 
 retry:
        if (hstate_is_gigantic(h))
-               page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
+               folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
        else
-               page = alloc_buddy_huge_page(h, gfp_mask,
+               folio = alloc_buddy_hugetlb_folio(h, gfp_mask,
                                nid, nmask, node_alloc_noretry);
-       if (!page)
+       if (!folio)
                return NULL;
-
        if (hstate_is_gigantic(h)) {
-               if (!prep_compound_gigantic_page(page, huge_page_order(h))) {
+               if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
                        /*
                         * Rare failure to convert pages to compound page.
                         * Free pages and try again - ONCE!
                         */
-                       free_gigantic_page(page, huge_page_order(h));
+                       free_gigantic_folio(folio, huge_page_order(h));
                        if (!retry) {
                                retry = true;
                                goto retry;
@@ -2044,9 +2050,9 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
                        return NULL;
                }
        }
-       prep_new_huge_page(h, page, page_to_nid(page));
+       prep_new_hugetlb_folio(h, folio, folio_nid(folio));
 
-       return page;
+       return folio;
 }
 
 /*
@@ -2056,23 +2062,20 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
 static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
                                nodemask_t *node_alloc_noretry)
 {
-       struct page *page;
+       struct folio *folio;
        int nr_nodes, node;
        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 
        for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
-               page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed,
-                                               node_alloc_noretry);
-               if (page)
-                       break;
+               folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node,
+                                       nodes_allowed, node_alloc_noretry);
+               if (folio) {
+                       free_huge_page(&folio->page); /* free it into the hugepage allocator */
+                       return 1;
+               }
        }
 
-       if (!page)
-               return 0;
-
-       free_huge_page(page); /* free it into the hugepage allocator */
-
-       return 1;
+       return 0;
 }
 
 /*
@@ -2088,6 +2091,7 @@ static struct page *remove_pool_huge_page(struct hstate *h,
 {
        int nr_nodes, node;
        struct page *page = NULL;
+       struct folio *folio;
 
        lockdep_assert_held(&hugetlb_lock);
        for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
@@ -2099,7 +2103,8 @@ static struct page *remove_pool_huge_page(struct hstate *h,
                    !list_empty(&h->hugepage_freelists[node])) {
                        page = list_entry(h->hugepage_freelists[node].next,
                                          struct page, lru);
-                       remove_hugetlb_page(h, page, acct_surplus);
+                       folio = page_folio(page);
+                       remove_hugetlb_folio(h, folio, acct_surplus);
                        break;
                }
        }
@@ -2124,21 +2129,21 @@ static struct page *remove_pool_huge_page(struct hstate *h,
 int dissolve_free_huge_page(struct page *page)
 {
        int rc = -EBUSY;
+       struct folio *folio = page_folio(page);
 
 retry:
        /* Not to disrupt normal path by vainly holding hugetlb_lock */
-       if (!PageHuge(page))
+       if (!folio_test_hugetlb(folio))
                return 0;
 
        spin_lock_irq(&hugetlb_lock);
-       if (!PageHuge(page)) {
+       if (!folio_test_hugetlb(folio)) {
                rc = 0;
                goto out;
        }
 
-       if (!page_count(page)) {
-               struct page *head = compound_head(page);
-               struct hstate *h = page_hstate(head);
+       if (!folio_ref_count(folio)) {
+               struct hstate *h = folio_hstate(folio);
                if (!available_huge_pages(h))
                        goto out;
 
@@ -2146,7 +2151,7 @@ int dissolve_free_huge_page(struct page *page)
                 * We should make sure that the page is already on the free list
                 * when it is dissolved.
                 */
-               if (unlikely(!HPageFreed(head))) {
+               if (unlikely(!folio_test_hugetlb_freed(folio))) {
                        spin_unlock_irq(&hugetlb_lock);
                        cond_resched();
 
@@ -2161,24 +2166,24 @@ int dissolve_free_huge_page(struct page *page)
                        goto retry;
                }
 
-               remove_hugetlb_page(h, head, false);
+               remove_hugetlb_folio(h, folio, false);
                h->max_huge_pages--;
                spin_unlock_irq(&hugetlb_lock);
 
                /*
-                * Normally update_and_free_page will allocate required vmemmmap
-                * before freeing the page.  update_and_free_page will fail to
+                * Normally update_and_free_hugtlb_folio will allocate required vmemmmap
+                * before freeing the page.  update_and_free_hugtlb_folio will fail to
                 * free the page if it can not allocate required vmemmap.  We
                 * need to adjust max_huge_pages if the page is not freed.
                 * Attempt to allocate vmemmmap here so that we can take
                 * appropriate action on failure.
                 */
-               rc = hugetlb_vmemmap_restore(h, head);
+               rc = hugetlb_vmemmap_restore(h, &folio->page);
                if (!rc) {
-                       update_and_free_page(h, head, false);
+                       update_and_free_hugetlb_folio(h, folio, false);
                } else {
                        spin_lock_irq(&hugetlb_lock);
-                       add_hugetlb_page(h, head, false);
+                       add_hugetlb_folio(h, folio, false);
                        h->max_huge_pages++;
                        spin_unlock_irq(&hugetlb_lock);
                }
@@ -2229,7 +2234,7 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
 static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                                                int nid, nodemask_t *nmask)
 {
-       struct page *page = NULL;
+       struct folio *folio = NULL;
 
        if (hstate_is_gigantic(h))
                return NULL;
@@ -2239,8 +2244,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                goto out_unlock;
        spin_unlock_irq(&hugetlb_lock);
 
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-       if (!page)
+       folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+       if (!folio)
                return NULL;
 
        spin_lock_irq(&hugetlb_lock);
@@ -2252,43 +2257,42 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
         * codeflow
         */
        if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
-               SetHPageTemporary(page);
+               folio_set_hugetlb_temporary(folio);
                spin_unlock_irq(&hugetlb_lock);
-               free_huge_page(page);
+               free_huge_page(&folio->page);
                return NULL;
        }
 
        h->surplus_huge_pages++;
-       h->surplus_huge_pages_node[page_to_nid(page)]++;
+       h->surplus_huge_pages_node[folio_nid(folio)]++;
 
 out_unlock:
        spin_unlock_irq(&hugetlb_lock);
 
-       return page;
+       return &folio->page;
 }
 
 static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
                                     int nid, nodemask_t *nmask)
 {
-       struct page *page;
+       struct folio *folio;
 
        if (hstate_is_gigantic(h))
                return NULL;
 
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-       if (!page)
+       folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+       if (!folio)
                return NULL;
 
        /* fresh huge pages are frozen */
-       set_page_refcounted(page);
-
+       folio_ref_unfreeze(folio, 1);
        /*
         * We do not account these pages as surplus because they are only
         * temporary and will be released properly on the last reference
         */
-       SetHPageTemporary(page);
+       folio_set_hugetlb_temporary(folio);
 
-       return page;
+       return &folio->page;
 }
 
 /*
@@ -2430,7 +2434,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)
                if ((--needed) < 0)
                        break;
                /* Add the page to the hugetlb allocator */
-               enqueue_huge_page(h, page);
+               enqueue_hugetlb_folio(h, page_folio(page));
        }
 free:
        spin_unlock_irq(&hugetlb_lock);
@@ -2737,51 +2741,52 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 }
 
 /*
- * alloc_and_dissolve_huge_page - Allocate a new page and dissolve the old one
+ * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
+ * the old one
  * @h: struct hstate old page belongs to
- * @old_page: Old page to dissolve
+ * @old_folio: Old folio to dissolve
  * @list: List to isolate the page in case we need to
  * Returns 0 on success, otherwise negated error.
  */
-static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
-                                       struct list_head *list)
+static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
+                       struct folio *old_folio, struct list_head *list)
 {
        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
-       int nid = page_to_nid(old_page);
-       struct page *new_page;
+       int nid = folio_nid(old_folio);
+       struct folio *new_folio;
        int ret = 0;
 
        /*
-        * Before dissolving the page, we need to allocate a new one for the
-        * pool to remain stable.  Here, we allocate the page and 'prep' it
+        * Before dissolving the folio, we need to allocate a new one for the
+        * pool to remain stable.  Here, we allocate the folio and 'prep' it
         * by doing everything but actually updating counters and adding to
         * the pool.  This simplifies and let us do most of the processing
         * under the lock.
         */
-       new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
-       if (!new_page)
+       new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL);
+       if (!new_folio)
                return -ENOMEM;
-       __prep_new_huge_page(h, new_page);
+       __prep_new_hugetlb_folio(h, new_folio);
 
 retry:
        spin_lock_irq(&hugetlb_lock);
-       if (!PageHuge(old_page)) {
+       if (!folio_test_hugetlb(old_folio)) {
                /*
-                * Freed from under us. Drop new_page too.
+                * Freed from under us. Drop new_folio too.
                 */
                goto free_new;
-       } else if (page_count(old_page)) {
+       } else if (folio_ref_count(old_folio)) {
                /*
-                * Someone has grabbed the page, try to isolate it here.
+                * Someone has grabbed the folio, try to isolate it here.
                 * Fail with -EBUSY if not possible.
                 */
                spin_unlock_irq(&hugetlb_lock);
-               ret = isolate_hugetlb(old_page, list);
+               ret = isolate_hugetlb(&old_folio->page, list);
                spin_lock_irq(&hugetlb_lock);
                goto free_new;
-       } else if (!HPageFreed(old_page)) {
+       } else if (!folio_test_hugetlb_freed(old_folio)) {
                /*
-                * Page's refcount is 0 but it has not been enqueued in the
+                * Folio's refcount is 0 but it has not been enqueued in the
                 * freelist yet. Race window is small, so we can succeed here if
                 * we retry.
                 */
@@ -2790,35 +2795,35 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
                goto retry;
        } else {
                /*
-                * Ok, old_page is still a genuine free hugepage. Remove it from
+                * Ok, old_folio is still a genuine free hugepage. Remove it from
                 * the freelist and decrease the counters. These will be
                 * incremented again when calling __prep_account_new_huge_page()
-                * and enqueue_huge_page() for new_page. The counters will remain
-                * stable since this happens under the lock.
+                * and enqueue_hugetlb_folio() for new_folio. The counters will
+                * remain stable since this happens under the lock.
                 */
-               remove_hugetlb_page(h, old_page, false);
+               remove_hugetlb_folio(h, old_folio, false);
 
                /*
-                * Ref count on new page is already zero as it was dropped
+                * Ref count on new_folio is already zero as it was dropped
                 * earlier.  It can be directly added to the pool free list.
                 */
                __prep_account_new_huge_page(h, nid);
-               enqueue_huge_page(h, new_page);
+               enqueue_hugetlb_folio(h, new_folio);
 
                /*
-                * Pages have been replaced, we can safely free the old one.
+                * Folio has been replaced, we can safely free the old one.
                 */
                spin_unlock_irq(&hugetlb_lock);
-               update_and_free_page(h, old_page, false);
+               update_and_free_hugetlb_folio(h, old_folio, false);
        }
 
        return ret;
 
 free_new:
        spin_unlock_irq(&hugetlb_lock);
-       /* Page has a zero ref count, but needs a ref to be freed */
-       set_page_refcounted(new_page);
-       update_and_free_page(h, new_page, false);
+       /* Folio has a zero ref count, but needs a ref to be freed */
+       folio_ref_unfreeze(new_folio, 1);
+       update_and_free_hugetlb_folio(h, new_folio, false);
 
        return ret;
 }
@@ -2826,7 +2831,7 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
 {
        struct hstate *h;
-       struct page *head;
+       struct folio *folio = page_folio(page);
        int ret = -EBUSY;
 
        /*
@@ -2835,9 +2840,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
         * Return success when racing as if we dissolved the page ourselves.
         */
        spin_lock_irq(&hugetlb_lock);
-       if (PageHuge(page)) {
-               head = compound_head(page);
-               h = page_hstate(head);
+       if (folio_test_hugetlb(folio)) {
+               h = folio_hstate(folio);
        } else {
                spin_unlock_irq(&hugetlb_lock);
                return 0;
@@ -2852,10 +2856,10 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
        if (hstate_is_gigantic(h))
                return -ENOMEM;
 
-       if (page_count(head) && !isolate_hugetlb(head, list))
+       if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list))
                ret = 0;
-       else if (!page_count(head))
-               ret = alloc_and_dissolve_huge_page(h, head, list);
+       else if (!folio_ref_count(folio))
+               ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
 
        return ret;
 }
@@ -2866,6 +2870,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
        struct hugepage_subpool *spool = subpool_vma(vma);
        struct hstate *h = hstate_vma(vma);
        struct page *page;
+       struct folio *folio;
        long map_chg, map_commit;
        long gbl_chg;
        int ret, idx;
@@ -2943,6 +2948,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                set_page_refcounted(page);
                /* Fall through */
        }
+       folio = page_folio(page);
        hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
        /* If allocation is not consuming a reservation, also store the
         * hugetlb_cgroup pointer on the page.
@@ -2972,8 +2978,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                rsv_adjust = hugepage_subpool_put_pages(spool, 1);
                hugetlb_acct_memory(h, -rsv_adjust);
                if (deferred_reserve)
-                       hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
-                                       pages_per_huge_page(h), page);
+                       hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+                                       pages_per_huge_page(h), folio);
        }
        return page;
 
@@ -3038,17 +3044,18 @@ static void __init gather_bootmem_prealloc(void)
 
        list_for_each_entry(m, &huge_boot_pages, list) {
                struct page *page = virt_to_page(m);
+               struct folio *folio = page_folio(page);
                struct hstate *h = m->hstate;
 
                VM_BUG_ON(!hstate_is_gigantic(h));
-               WARN_ON(page_count(page) != 1);
-               if (prep_compound_gigantic_page(page, huge_page_order(h))) {
-                       WARN_ON(PageReserved(page));
-                       prep_new_huge_page(h, page, page_to_nid(page));
+               WARN_ON(folio_ref_count(folio) != 1);
+               if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
+                       WARN_ON(folio_test_reserved(folio));
+                       prep_new_hugetlb_folio(h, folio, folio_nid(folio));
                        free_huge_page(page); /* add to the hugepage allocator */
                } else {
                        /* VERY unlikely inflated ref count on a tail page */
-                       free_gigantic_page(page, huge_page_order(h));
+                       free_gigantic_folio(folio, huge_page_order(h));
                }
 
                /*
@@ -3070,14 +3077,14 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
                        if (!alloc_bootmem_huge_page(h, nid))
                                break;
                } else {
-                       struct page *page;
+                       struct folio *folio;
                        gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 
-                       page = alloc_fresh_huge_page(h, gfp_mask, nid,
+                       folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid,
                                        &node_states[N_MEMORY], NULL);
-                       if (!page)
+                       if (!folio)
                                break;
-                       free_huge_page(page); /* free it into the hugepage allocator */
+                       free_huge_page(&folio->page); /* free it into the hugepage allocator */
                }
                cond_resched();
        }
@@ -3222,7 +3229,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
                                goto out;
                        if (PageHighMem(page))
                                continue;
-                       remove_hugetlb_page(h, page, false);
+                       remove_hugetlb_folio(h, page_folio(page), false);
                        list_add(&page->lru, &page_list);
                }
        }
@@ -3427,12 +3434,13 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
 {
        int i, nid = page_to_nid(page);
        struct hstate *target_hstate;
+       struct folio *folio = page_folio(page);
        struct page *subpage;
        int rc = 0;
 
        target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
 
-       remove_hugetlb_page_for_demote(h, page, false);
+       remove_hugetlb_folio_for_demote(h, folio, false);
        spin_unlock_irq(&hugetlb_lock);
 
        rc = hugetlb_vmemmap_restore(h, page);
@@ -3440,15 +3448,15 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
                /* Allocation of vmemmmap failed, we can not demote page */
                spin_lock_irq(&hugetlb_lock);
                set_page_refcounted(page);
-               add_hugetlb_page(h, page, false);
+               add_hugetlb_folio(h, page_folio(page), false);
                return rc;
        }
 
        /*
-        * Use destroy_compound_hugetlb_page_for_demote for all huge page
+        * Use destroy_compound_hugetlb_folio_for_demote for all huge page
         * sizes as it will not ref count pages.
         */
-       destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h));
+       destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h));
 
        /*
         * Taking target hstate mutex synchronizes with set_max_huge_pages.
@@ -3462,13 +3470,14 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
        for (i = 0; i < pages_per_huge_page(h);
                                i += pages_per_huge_page(target_hstate)) {
                subpage = nth_page(page, i);
+               folio = page_folio(subpage);
                if (hstate_is_gigantic(target_hstate))
-                       prep_compound_gigantic_page_for_demote(subpage,
+                       prep_compound_gigantic_folio_for_demote(folio,
                                                        target_hstate->order);
                else
                        prep_compound_page(subpage, target_hstate->order);
                set_page_private(subpage, 0);
-               prep_new_huge_page(target_hstate, subpage, nid);
+               prep_new_hugetlb_folio(target_hstate, folio, nid);
                free_huge_page(subpage);
        }
        mutex_unlock(&target_hstate->resize_lock);
@@ -4777,7 +4786,6 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr
        hugepage_add_new_anon_rmap(new_page, vma, addr);
        set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
        hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
-       ClearHPageRestoreReserve(new_page);
        SetHPageMigratable(new_page);
 }
 
@@ -5066,7 +5074,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
        struct page *page;
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
-       struct mmu_notifier_range range;
        unsigned long last_addr_mask;
        bool force_flush = false;
 
@@ -5081,13 +5088,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
        tlb_change_page_size(tlb, sz);
        tlb_start_vma(tlb, vma);
 
-       /*
-        * If sharing possible, alert mmu notifiers of worst case.
-        */
-       mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
-                               end);
-       adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
-       mmu_notifier_invalidate_range_start(&range);
        last_addr_mask = hugetlb_mask_last_page(h);
        address = start;
        for (; address < end; address += sz) {
@@ -5117,7 +5117,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                 * unmapped and its refcount is dropped, so just clear pte here.
                 */
                if (unlikely(!pte_present(pte))) {
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
                        /*
                         * If the pte was wr-protected by uffd-wp in any of the
                         * swap forms, meanwhile the caller does not want to
@@ -5129,7 +5128,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                                set_huge_pte_at(mm, address, ptep,
                                                make_pte_marker(PTE_MARKER_UFFD_WP));
                        else
-#endif
                                huge_pte_clear(mm, address, ptep, sz);
                        spin_unlock(ptl);
                        continue;
@@ -5158,13 +5156,11 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
                if (huge_pte_dirty(pte))
                        set_page_dirty(page);
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
                /* Leave a uffd-wp pte marker if needed */
                if (huge_pte_uffd_wp(pte) &&
                    !(zap_flags & ZAP_FLAG_DROP_MARKER))
                        set_huge_pte_at(mm, address, ptep,
                                        make_pte_marker(PTE_MARKER_UFFD_WP));
-#endif
                hugetlb_count_sub(pages_per_huge_page(h), mm);
                page_remove_rmap(page, vma, true);
 
@@ -5176,7 +5172,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                if (ref_page)
                        break;
        }
-       mmu_notifier_invalidate_range_end(&range);
        tlb_end_vma(tlb, vma);
 
        /*
@@ -5204,6 +5199,7 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
        hugetlb_vma_lock_write(vma);
        i_mmap_lock_write(vma->vm_file->f_mapping);
 
+       /* mmu notification performed in caller */
        __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
 
        if (zap_flags & ZAP_FLAG_UNMAP) {       /* final unmap */
@@ -5228,10 +5224,18 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
                          unsigned long end, struct page *ref_page,
                          zap_flags_t zap_flags)
 {
+       struct mmu_notifier_range range;
        struct mmu_gather tlb;
 
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+                               start, end);
+       adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
+       mmu_notifier_invalidate_range_start(&range);
        tlb_gather_mmu(&tlb, vma->vm_mm);
+
        __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
+
+       mmu_notifier_invalidate_range_end(&range);
        tlb_finish_mmu(&tlb);
 }
 
@@ -5310,9 +5314,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long haddr = address & huge_page_mask(h);
        struct mmu_notifier_range range;
 
-       VM_BUG_ON(unshare && (flags & FOLL_WRITE));
-       VM_BUG_ON(!unshare && !(flags & FOLL_WRITE));
-
        /*
         * hugetlb does not support FOLL_FORCE-style write faults that keep the
         * PTE mapped R/O such as maybe_mkwrite() would do.
@@ -5322,8 +5323,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
 
        /* Let's take out MAP_SHARED mappings first. */
        if (vma->vm_flags & VM_MAYSHARE) {
-               if (unlikely(unshare))
-                       return 0;
                set_huge_ptep_writable(vma, haddr, ptep);
                return 0;
        }
@@ -5445,8 +5444,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
        spin_lock(ptl);
        ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
        if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
-               ClearHPageRestoreReserve(new_page);
-
                /* Break COW or unshare */
                huge_ptep_clear_flush(vma, haddr, ptep);
                mmu_notifier_invalidate_range(mm, range.start, range.end);
@@ -5741,10 +5738,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
        if (!pte_same(huge_ptep_get(ptep), old_pte))
                goto backout;
 
-       if (anon_rmap) {
-               ClearHPageRestoreReserve(page);
+       if (anon_rmap)
                hugepage_add_new_anon_rmap(page, vma, haddr);
-       else
+       else
                page_dup_file_rmap(page, true);
        new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
                                && (vma->vm_flags & VM_SHARED)));
@@ -6131,12 +6127,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
        if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
                goto out_release_unlock;
 
-       if (page_in_pagecache) {
+       if (page_in_pagecache)
                page_dup_file_rmap(page, true);
-       } else {
-               ClearHPageRestoreReserve(page);
+       else
                hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
-       }
 
        /*
         * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
@@ -6201,7 +6195,8 @@ static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
        }
 }
 
-static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
+static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma,
+                                              unsigned int flags, pte_t *pte,
                                               bool *unshare)
 {
        pte_t pteval = huge_ptep_get(pte);
@@ -6213,13 +6208,69 @@ static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
                return false;
        if (flags & FOLL_WRITE)
                return true;
-       if (gup_must_unshare(flags, pte_page(pteval))) {
+       if (gup_must_unshare(vma, flags, pte_page(pteval))) {
                *unshare = true;
                return true;
        }
        return false;
 }
 
+struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+                               unsigned long address, unsigned int flags)
+{
+       struct hstate *h = hstate_vma(vma);
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long haddr = address & huge_page_mask(h);
+       struct page *page = NULL;
+       spinlock_t *ptl;
+       pte_t *pte, entry;
+
+       /*
+        * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
+        * follow_hugetlb_page().
+        */
+       if (WARN_ON_ONCE(flags & FOLL_PIN))
+               return NULL;
+
+retry:
+       pte = huge_pte_offset(mm, haddr, huge_page_size(h));
+       if (!pte)
+               return NULL;
+
+       ptl = huge_pte_lock(h, mm, pte);
+       entry = huge_ptep_get(pte);
+       if (pte_present(entry)) {
+               page = pte_page(entry) +
+                               ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
+               /*
+                * Note that page may be a sub-page, and with vmemmap
+                * optimizations the page struct may be read only.
+                * try_grab_page() will increase the ref count on the
+                * head page, so this will be OK.
+                *
+                * try_grab_page() should always be able to get the page here,
+                * because we hold the ptl lock and have verified pte_present().
+                */
+               if (try_grab_page(page, flags)) {
+                       page = NULL;
+                       goto out;
+               }
+       } else {
+               if (is_hugetlb_entry_migration(entry)) {
+                       spin_unlock(ptl);
+                       __migration_entry_wait_huge(pte, ptl);
+                       goto retry;
+               }
+               /*
+                * hwpoisoned entry is treated as no_page_table in
+                * follow_page_mask().
+                */
+       }
+out:
+       spin_unlock(ptl);
+       return page;
+}
+
 long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         struct page **pages, struct vm_area_struct **vmas,
                         unsigned long *position, unsigned long *nr_pages,
@@ -6286,7 +6337,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 * directly from any kind of swap entries.
                 */
                if (absent ||
-                   __follow_hugetlb_must_fault(flags, pte, &unshare)) {
+                   __follow_hugetlb_must_fault(vma, flags, pte, &unshare)) {
                        vm_fault_t ret;
                        unsigned int fault_flags = 0;
 
@@ -6296,9 +6347,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                fault_flags |= FAULT_FLAG_WRITE;
                        else if (unshare)
                                fault_flags |= FAULT_FLAG_UNSHARE;
-                       if (locked)
+                       if (locked) {
                                fault_flags |= FAULT_FLAG_ALLOW_RETRY |
                                        FAULT_FLAG_KILLABLE;
+                               if (flags & FOLL_INTERRUPTIBLE)
+                                       fault_flags |= FAULT_FLAG_INTERRUPTIBLE;
+                       }
                        if (flags & FOLL_NOWAIT)
                                fault_flags |= FAULT_FLAG_ALLOW_RETRY |
                                        FAULT_FLAG_RETRY_NOWAIT;
@@ -7214,123 +7268,6 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h)
  * These functions are overwritable if your architecture needs its own
  * behavior.
  */
-struct page * __weak
-follow_huge_addr(struct mm_struct *mm, unsigned long address,
-                             int write)
-{
-       return ERR_PTR(-EINVAL);
-}
-
-struct page * __weak
-follow_huge_pd(struct vm_area_struct *vma,
-              unsigned long address, hugepd_t hpd, int flags, int pdshift)
-{
-       WARN(1, "hugepd follow called with no support for hugepage directory format\n");
-       return NULL;
-}
-
-struct page * __weak
-follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags)
-{
-       struct hstate *h = hstate_vma(vma);
-       struct mm_struct *mm = vma->vm_mm;
-       struct page *page = NULL;
-       spinlock_t *ptl;
-       pte_t *ptep, pte;
-
-       /*
-        * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
-        * follow_hugetlb_page().
-        */
-       if (WARN_ON_ONCE(flags & FOLL_PIN))
-               return NULL;
-
-retry:
-       ptep = huge_pte_offset(mm, address, huge_page_size(h));
-       if (!ptep)
-               return NULL;
-
-       ptl = huge_pte_lock(h, mm, ptep);
-       pte = huge_ptep_get(ptep);
-       if (pte_present(pte)) {
-               page = pte_page(pte) +
-                       ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
-               /*
-                * try_grab_page() should always be able to get the page here,
-                * because: a) we hold the pmd (ptl) lock, and b) we've just
-                * checked that the huge pmd (head) page is present in the
-                * page tables. The ptl prevents the head page and tail pages
-                * from being rearranged in any way. So this page must be
-                * available at this point, unless the page refcount
-                * overflowed:
-                */
-               if (try_grab_page(page, flags)) {
-                       page = NULL;
-                       goto out;
-               }
-       } else {
-               if (is_hugetlb_entry_migration(pte)) {
-                       spin_unlock(ptl);
-                       __migration_entry_wait_huge(ptep, ptl);
-                       goto retry;
-               }
-               /*
-                * hwpoisoned entry is treated as no_page_table in
-                * follow_page_mask().
-                */
-       }
-out:
-       spin_unlock(ptl);
-       return page;
-}
-
-struct page * __weak
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
-               pud_t *pud, int flags)
-{
-       struct page *page = NULL;
-       spinlock_t *ptl;
-       pte_t pte;
-
-       if (WARN_ON_ONCE(flags & FOLL_PIN))
-               return NULL;
-
-retry:
-       ptl = huge_pte_lock(hstate_sizelog(PUD_SHIFT), mm, (pte_t *)pud);
-       if (!pud_huge(*pud))
-               goto out;
-       pte = huge_ptep_get((pte_t *)pud);
-       if (pte_present(pte)) {
-               page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-               if (try_grab_page(page, flags)) {
-                       page = NULL;
-                       goto out;
-               }
-       } else {
-               if (is_hugetlb_entry_migration(pte)) {
-                       spin_unlock(ptl);
-                       __migration_entry_wait(mm, (pte_t *)pud, ptl);
-                       goto retry;
-               }
-               /*
-                * hwpoisoned entry is treated as no_page_table in
-                * follow_page_mask().
-                */
-       }
-out:
-       spin_unlock(ptl);
-       return page;
-}
-
-struct page * __weak
-follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags)
-{
-       if (flags & (FOLL_GET | FOLL_PIN))
-               return NULL;
-
-       return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
-}
-
 int isolate_hugetlb(struct page *page, struct list_head *list)
 {
        int ret = 0;
@@ -7349,7 +7286,7 @@ int isolate_hugetlb(struct page *page, struct list_head *list)
        return ret;
 }
 
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
 {
        int ret = 0;
 
@@ -7359,7 +7296,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
                *hugetlb = true;
                if (HPageFreed(page))
                        ret = 0;
-               else if (HPageMigratable(page))
+               else if (HPageMigratable(page) || unpoison)
                        ret = get_page_unless_zero(page);
                else
                        ret = -EBUSY;
@@ -7368,12 +7305,13 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
        return ret;
 }
 
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+                               bool *migratable_cleared)
 {
        int ret;
 
        spin_lock_irq(&hugetlb_lock);
-       ret = __get_huge_page_for_hwpoison(pfn, flags);
+       ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
        spin_unlock_irq(&hugetlb_lock);
        return ret;
 }
@@ -7387,15 +7325,15 @@ void putback_active_hugepage(struct page *page)
        put_page(page);
 }
 
-void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
+void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason)
 {
-       struct hstate *h = page_hstate(oldpage);
+       struct hstate *h = folio_hstate(old_folio);
 
-       hugetlb_cgroup_migrate(oldpage, newpage);
-       set_page_owner_migrate_reason(newpage, reason);
+       hugetlb_cgroup_migrate(old_folio, new_folio);
+       set_page_owner_migrate_reason(&new_folio->page, reason);
 
        /*
-        * transfer temporary state of the new huge page. This is
+        * transfer temporary state of the new hugetlb folio. This is
         * reverse to other transitions because the newpage is going to
         * be final while the old one will be freed so it takes over
         * the temporary status.
@@ -7404,12 +7342,13 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
         * here as well otherwise the global surplus count will not match
         * the per-node's.
         */
-       if (HPageTemporary(newpage)) {
-               int old_nid = page_to_nid(oldpage);
-               int new_nid = page_to_nid(newpage);
+       if (folio_test_hugetlb_temporary(new_folio)) {
+               int old_nid = folio_nid(old_folio);
+               int new_nid = folio_nid(new_folio);
+
+               folio_set_hugetlb_temporary(old_folio);
+               folio_clear_hugetlb_temporary(new_folio);
 
-               SetHPageTemporary(oldpage);
-               ClearHPageTemporary(newpage);
 
                /*
                 * There is no need to transfer the per-node surplus state