Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

[sfrench/cifs-2.6.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 3d9f4abec17c61e6af64d51ef74612e908371aca..77f36e3681e390dc7f1382cd4dc9bca84b09a611 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -54,13 +54,13 @@ struct hstate hstates[HUGE_MAX_HSTATE];
  #ifdef CONFIG_CMA
  static struct cma *hugetlb_cma[MAX_NUMNODES];
  static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
-static bool hugetlb_cma_page(struct page *page, unsigned int order)
+static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
  {
-       return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page,
+       return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
                                 1 << order);
  }
  #else
-static bool hugetlb_cma_page(struct page *page, unsigned int order)
+static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
  {
         return false;
  }
@@ -1127,17 +1127,17 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
         return false;
  }
  
-static void enqueue_huge_page(struct hstate *h, struct page *page)
+static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio)
  {
-       int nid = page_to_nid(page);
+       int nid = folio_nid(folio);
  
         lockdep_assert_held(&hugetlb_lock);
-       VM_BUG_ON_PAGE(page_count(page), page);
+       VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
  
-       list_move(&page->lru, &h->hugepage_freelists[nid]);
+       list_move(&folio->lru, &h->hugepage_freelists[nid]);
         h->free_huge_pages++;
         h->free_huge_pages_node[nid]++;
-       SetHPageFreed(page);
+       folio_set_hugetlb_freed(folio);
  }
  
  static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
@@ -1325,76 +1325,76 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
                 nr_nodes--)
  
  /* used to demote non-gigantic_huge pages as well */
-static void __destroy_compound_gigantic_page(struct page *page,
+static void __destroy_compound_gigantic_folio(struct folio *folio,
                                         unsigned int order, bool demote)
  {
         int i;
         int nr_pages = 1 << order;
         struct page *p;
  
-       atomic_set(compound_mapcount_ptr(page), 0);
-       atomic_set(compound_pincount_ptr(page), 0);
+       atomic_set(folio_mapcount_ptr(folio), 0);
+       atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+       atomic_set(folio_pincount_ptr(folio), 0);
  
         for (i = 1; i < nr_pages; i++) {
-               p = nth_page(page, i);
+               p = folio_page(folio, i);
                 p->mapping = NULL;
                 clear_compound_head(p);
                 if (!demote)
                         set_page_refcounted(p);
         }
  
-       set_compound_order(page, 0);
-#ifdef CONFIG_64BIT
-       page[1].compound_nr = 0;
-#endif
-       __ClearPageHead(page);
+       folio_set_compound_order(folio, 0);
+       __folio_clear_head(folio);
  }
  
-static void destroy_compound_hugetlb_page_for_demote(struct page *page,
+static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio,
                                         unsigned int order)
  {
-       __destroy_compound_gigantic_page(page, order, true);
+       __destroy_compound_gigantic_folio(folio, order, true);
  }
  
  #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static void destroy_compound_gigantic_page(struct page *page,
+static void destroy_compound_gigantic_folio(struct folio *folio,
                                         unsigned int order)
  {
-       __destroy_compound_gigantic_page(page, order, false);
+       __destroy_compound_gigantic_folio(folio, order, false);
  }
  
-static void free_gigantic_page(struct page *page, unsigned int order)
+static void free_gigantic_folio(struct folio *folio, unsigned int order)
  {
         /*
          * If the page isn't allocated using the cma allocator,
          * cma_release() returns false.
          */
  #ifdef CONFIG_CMA
-       if (cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+       int nid = folio_nid(folio);
+
+       if (cma_release(hugetlb_cma[nid], &folio->page, 1 << order))
                 return;
  #endif
  
-       free_contig_range(page_to_pfn(page), 1 << order);
+       free_contig_range(folio_pfn(folio), 1 << order);
  }
  
  #ifdef CONFIG_CONTIG_ALLOC
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
                 int nid, nodemask_t *nodemask)
  {
+       struct page *page;
         unsigned long nr_pages = pages_per_huge_page(h);
         if (nid == NUMA_NO_NODE)
                 nid = numa_mem_id();
  
  #ifdef CONFIG_CMA
         {
-               struct page *page;
                 int node;
  
                 if (hugetlb_cma[nid]) {
                         page = cma_alloc(hugetlb_cma[nid], nr_pages,
                                         huge_page_order(h), true);
                         if (page)
-                               return page;
+                               return page_folio(page);
                 }
  
                 if (!(gfp_mask & __GFP_THISNODE)) {
@@ -1405,17 +1405,18 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
                                 page = cma_alloc(hugetlb_cma[node], nr_pages,
                                                 huge_page_order(h), true);
                                 if (page)
-                                       return page;
+                                       return page_folio(page);
                         }
                 }
         }
  #endif
  
-       return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+       page = alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+       return page ? page_folio(page) : NULL;
  }
  
  #else /* !CONFIG_CONTIG_ALLOC */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
                                         int nid, nodemask_t *nodemask)
  {
         return NULL;
@@ -1423,40 +1424,41 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
  #endif /* CONFIG_CONTIG_ALLOC */
  
  #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
                                         int nid, nodemask_t *nodemask)
  {
         return NULL;
  }
-static inline void free_gigantic_page(struct page *page, unsigned int order) { }
-static inline void destroy_compound_gigantic_page(struct page *page,
+static inline void free_gigantic_folio(struct folio *folio,
+                                               unsigned int order) { }
+static inline void destroy_compound_gigantic_folio(struct folio *folio,
                                                 unsigned int order) { }
  #endif
  
  /*
- * Remove hugetlb page from lists, and update dtor so that page appears
+ * Remove hugetlb folio from lists, and update dtor so that the folio appears
   * as just a compound page.
   *
- * A reference is held on the page, except in the case of demote.
+ * A reference is held on the folio, except in the case of demote.
   *
   * Must be called with hugetlb lock held.
   */
-static void __remove_hugetlb_page(struct hstate *h, struct page *page,
+static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
                                                         bool adjust_surplus,
                                                         bool demote)
  {
-       int nid = page_to_nid(page);
+       int nid = folio_nid(folio);
  
-       VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
-       VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page);
+       VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio);
+       VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio);
  
         lockdep_assert_held(&hugetlb_lock);
         if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
                 return;
  
-       list_del(&page->lru);
+       list_del(&folio->lru);
  
-       if (HPageFreed(page)) {
+       if (folio_test_hugetlb_freed(folio)) {
                 h->free_huge_pages--;
                 h->free_huge_pages_node[nid]--;
         }
@@ -1475,50 +1477,50 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page,
          *
          * For gigantic pages set the destructor to the null dtor.  This
          * destructor will never be called.  Before freeing the gigantic
-        * page destroy_compound_gigantic_page will turn the compound page
-        * into a simple group of pages.  After this the destructor does not
+        * page destroy_compound_gigantic_folio will turn the folio into a
+        * simple group of pages.  After this the destructor does not
          * apply.
          *
          * This handles the case where more than one ref is held when and
-        * after update_and_free_page is called.
+        * after update_and_free_hugetlb_folio is called.
          *
          * In the case of demote we do not ref count the page as it will soon
          * be turned into a page of smaller size.
          */
         if (!demote)
-               set_page_refcounted(page);
+               folio_ref_unfreeze(folio, 1);
         if (hstate_is_gigantic(h))
-               set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+               folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
         else
-               set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
+               folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
  
         h->nr_huge_pages--;
         h->nr_huge_pages_node[nid]--;
  }
  
-static void remove_hugetlb_page(struct hstate *h, struct page *page,
+static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
                                                         bool adjust_surplus)
  {
-       __remove_hugetlb_page(h, page, adjust_surplus, false);
+       __remove_hugetlb_folio(h, folio, adjust_surplus, false);
  }
  
-static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page,
+static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
                                                         bool adjust_surplus)
  {
-       __remove_hugetlb_page(h, page, adjust_surplus, true);
+       __remove_hugetlb_folio(h, folio, adjust_surplus, true);
  }
  
-static void add_hugetlb_page(struct hstate *h, struct page *page,
+static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
                              bool adjust_surplus)
  {
         int zeroed;
-       int nid = page_to_nid(page);
+       int nid = folio_nid(folio);
  
-       VM_BUG_ON_PAGE(!HPageVmemmapOptimized(page), page);
+       VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
  
         lockdep_assert_held(&hugetlb_lock);
  
-       INIT_LIST_HEAD(&page->lru);
+       INIT_LIST_HEAD(&folio->lru);
         h->nr_huge_pages++;
         h->nr_huge_pages_node[nid]++;
  
@@ -1527,21 +1529,21 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
                 h->surplus_huge_pages_node[nid]++;
         }
  
-       set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
-       set_page_private(page, 0);
+       folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
+       folio_change_private(folio, NULL);
         /*
-        * We have to set HPageVmemmapOptimized again as above
-        * set_page_private(page, 0) cleared it.
+        * We have to set hugetlb_vmemmap_optimized again as above
+        * folio_change_private(folio, NULL) cleared it.
          */
-       SetHPageVmemmapOptimized(page);
+       folio_set_hugetlb_vmemmap_optimized(folio);
  
         /*
-        * This page is about to be managed by the hugetlb allocator and
+        * This folio is about to be managed by the hugetlb allocator and
          * should have no users.  Drop our reference, and check for others
          * just in case.
          */
-       zeroed = put_page_testzero(page);
-       if (!zeroed)
+       zeroed = folio_put_testzero(folio);
+       if (unlikely(!zeroed))
                 /*
                  * It is VERY unlikely soneone else has taken a ref on
                  * the page.  In this case, we simply return as the
@@ -1550,13 +1552,14 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
                  */
                 return;
  
-       arch_clear_hugepage_flags(page);
-       enqueue_huge_page(h, page);
+       arch_clear_hugepage_flags(&folio->page);
+       enqueue_hugetlb_folio(h, folio);
  }
  
  static void __update_and_free_page(struct hstate *h, struct page *page)
  {
         int i;
+       struct folio *folio = page_folio(page);
         struct page *subpage;
  
         if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
@@ -1566,7 +1569,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
          * If we don't know which subpages are hwpoisoned, we can't free
          * the hugepage, so it's leaked intentionally.
          */
-       if (HPageRawHwpUnreliable(page))
+       if (folio_test_hugetlb_raw_hwp_unreliable(folio))
                 return;
  
         if (hugetlb_vmemmap_restore(h, page)) {
@@ -1576,7 +1579,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
                  * page and put the page back on the hugetlb free list and treat
                  * as a surplus page.
                  */
-               add_hugetlb_page(h, page, true);
+               add_hugetlb_folio(h, folio, true);
                 spin_unlock_irq(&hugetlb_lock);
                 return;
         }
@@ -1585,11 +1588,11 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
          * Move PageHWPoison flag from head page to the raw error pages,
          * which makes any healthy subpages reusable.
          */
-       if (unlikely(PageHWPoison(page)))
-               hugetlb_clear_page_hwpoison(page);
+       if (unlikely(folio_test_hwpoison(folio)))
+               hugetlb_clear_page_hwpoison(&folio->page);
  
         for (i = 0; i < pages_per_huge_page(h); i++) {
-               subpage = nth_page(page, i);
+               subpage = folio_page(folio, i);
                 subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
                                 1 << PG_referenced | 1 << PG_dirty |
                                 1 << PG_active | 1 << PG_private |
@@ -1598,19 +1601,19 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
  
         /*
          * Non-gigantic pages demoted from CMA allocated gigantic pages
-        * need to be given back to CMA in free_gigantic_page.
+        * need to be given back to CMA in free_gigantic_folio.
          */
         if (hstate_is_gigantic(h) ||
-           hugetlb_cma_page(page, huge_page_order(h))) {
-               destroy_compound_gigantic_page(page, huge_page_order(h));
-               free_gigantic_page(page, huge_page_order(h));
+           hugetlb_cma_folio(folio, huge_page_order(h))) {
+               destroy_compound_gigantic_folio(folio, huge_page_order(h));
+               free_gigantic_folio(folio, huge_page_order(h));
         } else {
                 __free_pages(page, huge_page_order(h));
         }
  }
  
  /*
- * As update_and_free_page() can be called under any context, so we cannot
+ * As update_and_free_hugetlb_folio() can be called under any context, so we cannot
   * use GFP_KERNEL to allocate vmemmap pages. However, we can defer the
   * actual freeing in a workqueue to prevent from using GFP_ATOMIC to allocate
   * the vmemmap pages.
@@ -1639,8 +1642,9 @@ static void free_hpage_workfn(struct work_struct *work)
                 /*
                  * The VM_BUG_ON_PAGE(!PageHuge(page), page) in page_hstate()
                  * is going to trigger because a previous call to
-                * remove_hugetlb_page() will set_compound_page_dtor(page,
-                * NULL_COMPOUND_DTOR), so do not use page_hstate() directly.
+                * remove_hugetlb_folio() will call folio_set_compound_dtor
+                * (folio, NULL_COMPOUND_DTOR), so do not use page_hstate()
+                * directly.
                  */
                 h = size_to_hstate(page_size(page));
  
@@ -1657,11 +1661,11 @@ static inline void flush_free_hpage_work(struct hstate *h)
                 flush_work(&free_hpage_work);
  }
  
-static void update_and_free_page(struct hstate *h, struct page *page,
+static void update_and_free_hugetlb_folio(struct hstate *h, struct folio *folio,
                                  bool atomic)
  {
-       if (!HPageVmemmapOptimized(page) || !atomic) {
-               __update_and_free_page(h, page);
+       if (!folio_test_hugetlb_vmemmap_optimized(folio) || !atomic) {
+               __update_and_free_page(h, &folio->page);
                 return;
         }
  
@@ -1672,16 +1676,18 @@ static void update_and_free_page(struct hstate *h, struct page *page,
          * empty. Otherwise, schedule_work() had been called but the workfn
          * hasn't retrieved the list yet.
          */
-       if (llist_add((struct llist_node *)&page->mapping, &hpage_freelist))
+       if (llist_add((struct llist_node *)&folio->mapping, &hpage_freelist))
                 schedule_work(&free_hpage_work);
  }
  
  static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list)
  {
         struct page *page, *t_page;
+       struct folio *folio;
  
         list_for_each_entry_safe(page, t_page, list, lru) {
-               update_and_free_page(h, page, false);
+               folio = page_folio(page);
+               update_and_free_hugetlb_folio(h, folio, false);
                 cond_resched();
         }
  }
@@ -1703,21 +1709,22 @@ void free_huge_page(struct page *page)
          * Can't pass hstate in here because it is called from the
          * compound page destructor.
          */
-       struct hstate *h = page_hstate(page);
-       int nid = page_to_nid(page);
-       struct hugepage_subpool *spool = hugetlb_page_subpool(page);
+       struct folio *folio = page_folio(page);
+       struct hstate *h = folio_hstate(folio);
+       int nid = folio_nid(folio);
+       struct hugepage_subpool *spool = hugetlb_folio_subpool(folio);
         bool restore_reserve;
         unsigned long flags;
  
-       VM_BUG_ON_PAGE(page_count(page), page);
-       VM_BUG_ON_PAGE(page_mapcount(page), page);
+       VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
+       VM_BUG_ON_FOLIO(folio_mapcount(folio), folio);
  
-       hugetlb_set_page_subpool(page, NULL);
-       if (PageAnon(page))
-               __ClearPageAnonExclusive(page);
-       page->mapping = NULL;
-       restore_reserve = HPageRestoreReserve(page);
-       ClearHPageRestoreReserve(page);
+       hugetlb_set_folio_subpool(folio, NULL);
+       if (folio_test_anon(folio))
+               __ClearPageAnonExclusive(&folio->page);
+       folio->mapping = NULL;
+       restore_reserve = folio_test_hugetlb_restore_reserve(folio);
+       folio_clear_hugetlb_restore_reserve(folio);
  
         /*
          * If HPageRestoreReserve was set on page, page allocation consumed a
@@ -1739,26 +1746,26 @@ void free_huge_page(struct page *page)
         }
  
         spin_lock_irqsave(&hugetlb_lock, flags);
-       ClearHPageMigratable(page);
-       hugetlb_cgroup_uncharge_page(hstate_index(h),
-                                    pages_per_huge_page(h), page);
-       hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
-                                         pages_per_huge_page(h), page);
+       folio_clear_hugetlb_migratable(folio);
+       hugetlb_cgroup_uncharge_folio(hstate_index(h),
+                                    pages_per_huge_page(h), folio);
+       hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+                                         pages_per_huge_page(h), folio);
         if (restore_reserve)
                 h->resv_huge_pages++;
  
-       if (HPageTemporary(page)) {
-               remove_hugetlb_page(h, page, false);
+       if (folio_test_hugetlb_temporary(folio)) {
+               remove_hugetlb_folio(h, folio, false);
                 spin_unlock_irqrestore(&hugetlb_lock, flags);
-               update_and_free_page(h, page, true);
+               update_and_free_hugetlb_folio(h, folio, true);
         } else if (h->surplus_huge_pages_node[nid]) {
                 /* remove the page from active list */
-               remove_hugetlb_page(h, page, true);
+               remove_hugetlb_folio(h, folio, true);
                 spin_unlock_irqrestore(&hugetlb_lock, flags);
-               update_and_free_page(h, page, true);
+               update_and_free_hugetlb_folio(h, folio, true);
         } else {
                 arch_clear_hugepage_flags(page);
-               enqueue_huge_page(h, page);
+               enqueue_hugetlb_folio(h, folio);
                 spin_unlock_irqrestore(&hugetlb_lock, flags);
         }
  }
@@ -1773,37 +1780,37 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
         h->nr_huge_pages_node[nid]++;
  }
  
-static void __prep_new_huge_page(struct hstate *h, struct page *page)
+static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
  {
-       hugetlb_vmemmap_optimize(h, page);
-       INIT_LIST_HEAD(&page->lru);
-       set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
-       hugetlb_set_page_subpool(page, NULL);
-       set_hugetlb_cgroup(page, NULL);
-       set_hugetlb_cgroup_rsvd(page, NULL);
+       hugetlb_vmemmap_optimize(h, &folio->page);
+       INIT_LIST_HEAD(&folio->lru);
+       folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
+       hugetlb_set_folio_subpool(folio, NULL);
+       set_hugetlb_cgroup(folio, NULL);
+       set_hugetlb_cgroup_rsvd(folio, NULL);
  }
  
-static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
  {
-       __prep_new_huge_page(h, page);
+       __prep_new_hugetlb_folio(h, folio);
         spin_lock_irq(&hugetlb_lock);
         __prep_account_new_huge_page(h, nid);
         spin_unlock_irq(&hugetlb_lock);
  }
  
-static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
-                                                               bool demote)
+static bool __prep_compound_gigantic_folio(struct folio *folio,
+                                       unsigned int order, bool demote)
  {
         int i, j;
         int nr_pages = 1 << order;
         struct page *p;
  
-       /* we rely on prep_new_huge_page to set the destructor */
-       set_compound_order(page, order);
-       __ClearPageReserved(page);
-       __SetPageHead(page);
+       __folio_clear_reserved(folio);
+       __folio_set_head(folio);
+       /* we rely on prep_new_hugetlb_folio to set the destructor */
+       folio_set_compound_order(folio, order);
         for (i = 0; i < nr_pages; i++) {
-               p = nth_page(page, i);
+               p = folio_page(folio, i);
  
                 /*
                  * For gigantic hugepages allocated through bootmem at
@@ -1845,42 +1852,41 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
                         VM_BUG_ON_PAGE(page_count(p), p);
                 }
                 if (i != 0)
-                       set_compound_head(p, page);
+                       set_compound_head(p, &folio->page);
         }
-       atomic_set(compound_mapcount_ptr(page), -1);
-       atomic_set(compound_pincount_ptr(page), 0);
+       atomic_set(folio_mapcount_ptr(folio), -1);
+       atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+       atomic_set(folio_pincount_ptr(folio), 0);
         return true;
  
  out_error:
         /* undo page modifications made above */
         for (j = 0; j < i; j++) {
-               p = nth_page(page, j);
+               p = folio_page(folio, j);
                 if (j != 0)
                         clear_compound_head(p);
                 set_page_refcounted(p);
         }
         /* need to clear PG_reserved on remaining tail pages  */
         for (; j < nr_pages; j++) {
-               p = nth_page(page, j);
+               p = folio_page(folio, j);
                 __ClearPageReserved(p);
         }
-       set_compound_order(page, 0);
-#ifdef CONFIG_64BIT
-       page[1].compound_nr = 0;
-#endif
-       __ClearPageHead(page);
+       folio_set_compound_order(folio, 0);
+       __folio_clear_head(folio);
         return false;
  }
  
-static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
+static bool prep_compound_gigantic_folio(struct folio *folio,
+                                                       unsigned int order)
  {
-       return __prep_compound_gigantic_page(page, order, false);
+       return __prep_compound_gigantic_folio(folio, order, false);
  }
  
-static bool prep_compound_gigantic_page_for_demote(struct page *page,
+static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
                                                         unsigned int order)
  {
-       return __prep_compound_gigantic_page(page, order, true);
+       return __prep_compound_gigantic_folio(folio, order, true);
  }
  
  /*
@@ -1945,7 +1951,7 @@ pgoff_t hugetlb_basepage_index(struct page *page)
         return (index << compound_order(page_head)) + compound_idx;
  }
  
-static struct page *alloc_buddy_huge_page(struct hstate *h,
+static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
                 gfp_t gfp_mask, int nid, nodemask_t *nmask,
                 nodemask_t *node_alloc_noretry)
  {
@@ -1983,11 +1989,6 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
                 page = NULL;
         }
  
-       if (page)
-               __count_vm_event(HTLB_BUDDY_PGALLOC);
-       else
-               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
-
         /*
          * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
          * indicates an overall state change.  Clear bit so that we resume
@@ -2004,7 +2005,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
         if (node_alloc_noretry && !page && alloc_try_hard)
                 node_set(nid, *node_alloc_noretry);
  
-       return page;
+       if (!page) {
+               __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
+               return NULL;
+       }
+
+       __count_vm_event(HTLB_BUDDY_PGALLOC);
+       return page_folio(page);
  }
  
  /*
@@ -2014,29 +2021,28 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
   * Note that returned page is 'frozen':  ref count of head page and all tail
   * pages is zero.
   */
-static struct page *alloc_fresh_huge_page(struct hstate *h,
+static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
                 gfp_t gfp_mask, int nid, nodemask_t *nmask,
                 nodemask_t *node_alloc_noretry)
  {
-       struct page *page;
+       struct folio *folio;
         bool retry = false;
  
  retry:
         if (hstate_is_gigantic(h))
-               page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
+               folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
         else
-               page = alloc_buddy_huge_page(h, gfp_mask,
+               folio = alloc_buddy_hugetlb_folio(h, gfp_mask,
                                 nid, nmask, node_alloc_noretry);
-       if (!page)
+       if (!folio)
                 return NULL;
-
         if (hstate_is_gigantic(h)) {
-               if (!prep_compound_gigantic_page(page, huge_page_order(h))) {
+               if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
                         /*
                          * Rare failure to convert pages to compound page.
                          * Free pages and try again - ONCE!
                          */
-                       free_gigantic_page(page, huge_page_order(h));
+                       free_gigantic_folio(folio, huge_page_order(h));
                         if (!retry) {
                                 retry = true;
                                 goto retry;
@@ -2044,9 +2050,9 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
                         return NULL;
                 }
         }
-       prep_new_huge_page(h, page, page_to_nid(page));
+       prep_new_hugetlb_folio(h, folio, folio_nid(folio));
  
-       return page;
+       return folio;
  }
  
  /*
@@ -2056,23 +2062,20 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
  static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
                                 nodemask_t *node_alloc_noretry)
  {
-       struct page *page;
+       struct folio *folio;
         int nr_nodes, node;
         gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
  
         for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
-               page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed,
-                                               node_alloc_noretry);
-               if (page)
-                       break;
+               folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node,
+                                       nodes_allowed, node_alloc_noretry);
+               if (folio) {
+                       free_huge_page(&folio->page); /* free it into the hugepage allocator */
+                       return 1;
+               }
         }
  
-       if (!page)
-               return 0;
-
-       free_huge_page(page); /* free it into the hugepage allocator */
-
-       return 1;
+       return 0;
  }
  
  /*
@@ -2088,6 +2091,7 @@ static struct page *remove_pool_huge_page(struct hstate *h,
  {
         int nr_nodes, node;
         struct page *page = NULL;
+       struct folio *folio;
  
         lockdep_assert_held(&hugetlb_lock);
         for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
@@ -2099,7 +2103,8 @@ static struct page *remove_pool_huge_page(struct hstate *h,
                     !list_empty(&h->hugepage_freelists[node])) {
                         page = list_entry(h->hugepage_freelists[node].next,
                                           struct page, lru);
-                       remove_hugetlb_page(h, page, acct_surplus);
+                       folio = page_folio(page);
+                       remove_hugetlb_folio(h, folio, acct_surplus);
                         break;
                 }
         }
@@ -2124,21 +2129,21 @@ static struct page *remove_pool_huge_page(struct hstate *h,
  int dissolve_free_huge_page(struct page *page)
  {
         int rc = -EBUSY;
+       struct folio *folio = page_folio(page);
  
  retry:
         /* Not to disrupt normal path by vainly holding hugetlb_lock */
-       if (!PageHuge(page))
+       if (!folio_test_hugetlb(folio))
                 return 0;
  
         spin_lock_irq(&hugetlb_lock);
-       if (!PageHuge(page)) {
+       if (!folio_test_hugetlb(folio)) {
                 rc = 0;
                 goto out;
         }
  
-       if (!page_count(page)) {
-               struct page *head = compound_head(page);
-               struct hstate *h = page_hstate(head);
+       if (!folio_ref_count(folio)) {
+               struct hstate *h = folio_hstate(folio);
                 if (!available_huge_pages(h))
                         goto out;
  
@@ -2146,7 +2151,7 @@ int dissolve_free_huge_page(struct page *page)
                  * We should make sure that the page is already on the free list
                  * when it is dissolved.
                  */
-               if (unlikely(!HPageFreed(head))) {
+               if (unlikely(!folio_test_hugetlb_freed(folio))) {
                         spin_unlock_irq(&hugetlb_lock);
                         cond_resched();
  
@@ -2161,24 +2166,24 @@ int dissolve_free_huge_page(struct page *page)
                         goto retry;
                 }
  
-               remove_hugetlb_page(h, head, false);
+               remove_hugetlb_folio(h, folio, false);
                 h->max_huge_pages--;
                 spin_unlock_irq(&hugetlb_lock);
  
                 /*
-                * Normally update_and_free_page will allocate required vmemmmap
-                * before freeing the page.  update_and_free_page will fail to
+                * Normally update_and_free_hugtlb_folio will allocate required vmemmmap
+                * before freeing the page.  update_and_free_hugtlb_folio will fail to
                  * free the page if it can not allocate required vmemmap.  We
                  * need to adjust max_huge_pages if the page is not freed.
                  * Attempt to allocate vmemmmap here so that we can take
                  * appropriate action on failure.
                  */
-               rc = hugetlb_vmemmap_restore(h, head);
+               rc = hugetlb_vmemmap_restore(h, &folio->page);
                 if (!rc) {
-                       update_and_free_page(h, head, false);
+                       update_and_free_hugetlb_folio(h, folio, false);
                 } else {
                         spin_lock_irq(&hugetlb_lock);
-                       add_hugetlb_page(h, head, false);
+                       add_hugetlb_folio(h, folio, false);
                         h->max_huge_pages++;
                         spin_unlock_irq(&hugetlb_lock);
                 }
@@ -2229,7 +2234,7 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
  static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                                                 int nid, nodemask_t *nmask)
  {
-       struct page *page = NULL;
+       struct folio *folio = NULL;
  
         if (hstate_is_gigantic(h))
                 return NULL;
@@ -2239,8 +2244,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
                 goto out_unlock;
         spin_unlock_irq(&hugetlb_lock);
  
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-       if (!page)
+       folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+       if (!folio)
                 return NULL;
  
         spin_lock_irq(&hugetlb_lock);
@@ -2252,43 +2257,42 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
          * codeflow
          */
         if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
-               SetHPageTemporary(page);
+               folio_set_hugetlb_temporary(folio);
                 spin_unlock_irq(&hugetlb_lock);
-               free_huge_page(page);
+               free_huge_page(&folio->page);
                 return NULL;
         }
  
         h->surplus_huge_pages++;
-       h->surplus_huge_pages_node[page_to_nid(page)]++;
+       h->surplus_huge_pages_node[folio_nid(folio)]++;
  
  out_unlock:
         spin_unlock_irq(&hugetlb_lock);
  
-       return page;
+       return &folio->page;
  }
  
  static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
                                      int nid, nodemask_t *nmask)
  {
-       struct page *page;
+       struct folio *folio;
  
         if (hstate_is_gigantic(h))
                 return NULL;
  
-       page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-       if (!page)
+       folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+       if (!folio)
                 return NULL;
  
         /* fresh huge pages are frozen */
-       set_page_refcounted(page);
-
+       folio_ref_unfreeze(folio, 1);
         /*
          * We do not account these pages as surplus because they are only
          * temporary and will be released properly on the last reference
          */
-       SetHPageTemporary(page);
+       folio_set_hugetlb_temporary(folio);
  
-       return page;
+       return &folio->page;
  }
  
  /*
@@ -2430,7 +2434,7 @@ static int gather_surplus_pages(struct hstate *h, long delta)
                 if ((--needed) < 0)
                         break;
                 /* Add the page to the hugetlb allocator */
-               enqueue_huge_page(h, page);
+               enqueue_hugetlb_folio(h, page_folio(page));
         }
  free:
         spin_unlock_irq(&hugetlb_lock);
@@ -2737,51 +2741,52 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
  }
  
  /*
- * alloc_and_dissolve_huge_page - Allocate a new page and dissolve the old one
+ * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
+ * the old one
   * @h: struct hstate old page belongs to
- * @old_page: Old page to dissolve
+ * @old_folio: Old folio to dissolve
   * @list: List to isolate the page in case we need to
   * Returns 0 on success, otherwise negated error.
   */
-static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
-                                       struct list_head *list)
+static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
+                       struct folio *old_folio, struct list_head *list)
  {
         gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
-       int nid = page_to_nid(old_page);
-       struct page *new_page;
+       int nid = folio_nid(old_folio);
+       struct folio *new_folio;
         int ret = 0;
  
         /*
-        * Before dissolving the page, we need to allocate a new one for the
-        * pool to remain stable.  Here, we allocate the page and 'prep' it
+        * Before dissolving the folio, we need to allocate a new one for the
+        * pool to remain stable.  Here, we allocate the folio and 'prep' it
          * by doing everything but actually updating counters and adding to
          * the pool.  This simplifies and let us do most of the processing
          * under the lock.
          */
-       new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
-       if (!new_page)
+       new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL);
+       if (!new_folio)
                 return -ENOMEM;
-       __prep_new_huge_page(h, new_page);
+       __prep_new_hugetlb_folio(h, new_folio);
  
  retry:
         spin_lock_irq(&hugetlb_lock);
-       if (!PageHuge(old_page)) {
+       if (!folio_test_hugetlb(old_folio)) {
                 /*
-                * Freed from under us. Drop new_page too.
+                * Freed from under us. Drop new_folio too.
                  */
                 goto free_new;
-       } else if (page_count(old_page)) {
+       } else if (folio_ref_count(old_folio)) {
                 /*
-                * Someone has grabbed the page, try to isolate it here.
+                * Someone has grabbed the folio, try to isolate it here.
                  * Fail with -EBUSY if not possible.
                  */
                 spin_unlock_irq(&hugetlb_lock);
-               ret = isolate_hugetlb(old_page, list);
+               ret = isolate_hugetlb(&old_folio->page, list);
                 spin_lock_irq(&hugetlb_lock);
                 goto free_new;
-       } else if (!HPageFreed(old_page)) {
+       } else if (!folio_test_hugetlb_freed(old_folio)) {
                 /*
-                * Page's refcount is 0 but it has not been enqueued in the
+                * Folio's refcount is 0 but it has not been enqueued in the
                  * freelist yet. Race window is small, so we can succeed here if
                  * we retry.
                  */
@@ -2790,35 +2795,35 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
                 goto retry;
         } else {
                 /*
-                * Ok, old_page is still a genuine free hugepage. Remove it from
+                * Ok, old_folio is still a genuine free hugepage. Remove it from
                  * the freelist and decrease the counters. These will be
                  * incremented again when calling __prep_account_new_huge_page()
-                * and enqueue_huge_page() for new_page. The counters will remain
-                * stable since this happens under the lock.
+                * and enqueue_hugetlb_folio() for new_folio. The counters will
+                * remain stable since this happens under the lock.
                  */
-               remove_hugetlb_page(h, old_page, false);
+               remove_hugetlb_folio(h, old_folio, false);
  
                 /*
-                * Ref count on new page is already zero as it was dropped
+                * Ref count on new_folio is already zero as it was dropped
                  * earlier.  It can be directly added to the pool free list.
                  */
                 __prep_account_new_huge_page(h, nid);
-               enqueue_huge_page(h, new_page);
+               enqueue_hugetlb_folio(h, new_folio);
  
                 /*
-                * Pages have been replaced, we can safely free the old one.
+                * Folio has been replaced, we can safely free the old one.
                  */
                 spin_unlock_irq(&hugetlb_lock);
-               update_and_free_page(h, old_page, false);
+               update_and_free_hugetlb_folio(h, old_folio, false);
         }
  
         return ret;
  
  free_new:
         spin_unlock_irq(&hugetlb_lock);
-       /* Page has a zero ref count, but needs a ref to be freed */
-       set_page_refcounted(new_page);
-       update_and_free_page(h, new_page, false);
+       /* Folio has a zero ref count, but needs a ref to be freed */
+       folio_ref_unfreeze(new_folio, 1);
+       update_and_free_hugetlb_folio(h, new_folio, false);
  
         return ret;
  }
@@ -2826,7 +2831,7 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
  int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
  {
         struct hstate *h;
-       struct page *head;
+       struct folio *folio = page_folio(page);
         int ret = -EBUSY;
  
         /*
@@ -2835,9 +2840,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
          * Return success when racing as if we dissolved the page ourselves.
          */
         spin_lock_irq(&hugetlb_lock);
-       if (PageHuge(page)) {
-               head = compound_head(page);
-               h = page_hstate(head);
+       if (folio_test_hugetlb(folio)) {
+               h = folio_hstate(folio);
         } else {
                 spin_unlock_irq(&hugetlb_lock);
                 return 0;
@@ -2852,10 +2856,10 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
         if (hstate_is_gigantic(h))
                 return -ENOMEM;
  
-       if (page_count(head) && !isolate_hugetlb(head, list))
+       if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list))
                 ret = 0;
-       else if (!page_count(head))
-               ret = alloc_and_dissolve_huge_page(h, head, list);
+       else if (!folio_ref_count(folio))
+               ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
  
         return ret;
  }
@@ -2866,6 +2870,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
         struct hugepage_subpool *spool = subpool_vma(vma);
         struct hstate *h = hstate_vma(vma);
         struct page *page;
+       struct folio *folio;
         long map_chg, map_commit;
         long gbl_chg;
         int ret, idx;
@@ -2943,6 +2948,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                 set_page_refcounted(page);
                 /* Fall through */
         }
+       folio = page_folio(page);
         hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
         /* If allocation is not consuming a reservation, also store the
          * hugetlb_cgroup pointer on the page.
@@ -2972,8 +2978,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                 rsv_adjust = hugepage_subpool_put_pages(spool, 1);
                 hugetlb_acct_memory(h, -rsv_adjust);
                 if (deferred_reserve)
-                       hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
-                                       pages_per_huge_page(h), page);
+                       hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+                                       pages_per_huge_page(h), folio);
         }
         return page;
  
@@ -3038,17 +3044,18 @@ static void __init gather_bootmem_prealloc(void)
  
         list_for_each_entry(m, &huge_boot_pages, list) {
                 struct page *page = virt_to_page(m);
+               struct folio *folio = page_folio(page);
                 struct hstate *h = m->hstate;
  
                 VM_BUG_ON(!hstate_is_gigantic(h));
-               WARN_ON(page_count(page) != 1);
-               if (prep_compound_gigantic_page(page, huge_page_order(h))) {
-                       WARN_ON(PageReserved(page));
-                       prep_new_huge_page(h, page, page_to_nid(page));
+               WARN_ON(folio_ref_count(folio) != 1);
+               if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
+                       WARN_ON(folio_test_reserved(folio));
+                       prep_new_hugetlb_folio(h, folio, folio_nid(folio));
                         free_huge_page(page); /* add to the hugepage allocator */
                 } else {
                         /* VERY unlikely inflated ref count on a tail page */
-                       free_gigantic_page(page, huge_page_order(h));
+                       free_gigantic_folio(folio, huge_page_order(h));
                 }
  
                 /*
@@ -3070,14 +3077,14 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
                         if (!alloc_bootmem_huge_page(h, nid))
                                 break;
                 } else {
-                       struct page *page;
+                       struct folio *folio;
                         gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
  
-                       page = alloc_fresh_huge_page(h, gfp_mask, nid,
+                       folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid,
                                         &node_states[N_MEMORY], NULL);
-                       if (!page)
+                       if (!folio)
                                 break;
-                       free_huge_page(page); /* free it into the hugepage allocator */
+                       free_huge_page(&folio->page); /* free it into the hugepage allocator */
                 }
                 cond_resched();
         }
@@ -3222,7 +3229,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
                                 goto out;
                         if (PageHighMem(page))
                                 continue;
-                       remove_hugetlb_page(h, page, false);
+                       remove_hugetlb_folio(h, page_folio(page), false);
                         list_add(&page->lru, &page_list);
                 }
         }
@@ -3427,12 +3434,13 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
  {
         int i, nid = page_to_nid(page);
         struct hstate *target_hstate;
+       struct folio *folio = page_folio(page);
         struct page *subpage;
         int rc = 0;
  
         target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
  
-       remove_hugetlb_page_for_demote(h, page, false);
+       remove_hugetlb_folio_for_demote(h, folio, false);
         spin_unlock_irq(&hugetlb_lock);
  
         rc = hugetlb_vmemmap_restore(h, page);
@@ -3440,15 +3448,15 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
                 /* Allocation of vmemmmap failed, we can not demote page */
                 spin_lock_irq(&hugetlb_lock);
                 set_page_refcounted(page);
-               add_hugetlb_page(h, page, false);
+               add_hugetlb_folio(h, page_folio(page), false);
                 return rc;
         }
  
         /*
-        * Use destroy_compound_hugetlb_page_for_demote for all huge page
+        * Use destroy_compound_hugetlb_folio_for_demote for all huge page
          * sizes as it will not ref count pages.
          */
-       destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h));
+       destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h));
  
         /*
          * Taking target hstate mutex synchronizes with set_max_huge_pages.
@@ -3462,13 +3470,14 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
         for (i = 0; i < pages_per_huge_page(h);
                                 i += pages_per_huge_page(target_hstate)) {
                 subpage = nth_page(page, i);
+               folio = page_folio(subpage);
                 if (hstate_is_gigantic(target_hstate))
-                       prep_compound_gigantic_page_for_demote(subpage,
+                       prep_compound_gigantic_folio_for_demote(folio,
                                                         target_hstate->order);
                 else
                         prep_compound_page(subpage, target_hstate->order);
                 set_page_private(subpage, 0);
-               prep_new_huge_page(target_hstate, subpage, nid);
+               prep_new_hugetlb_folio(target_hstate, folio, nid);
                 free_huge_page(subpage);
         }
         mutex_unlock(&target_hstate->resize_lock);
@@ -4777,7 +4786,6 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr
         hugepage_add_new_anon_rmap(new_page, vma, addr);
         set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
         hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
-       ClearHPageRestoreReserve(new_page);
         SetHPageMigratable(new_page);
  }
  
@@ -5066,7 +5074,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
         struct page *page;
         struct hstate *h = hstate_vma(vma);
         unsigned long sz = huge_page_size(h);
-       struct mmu_notifier_range range;
         unsigned long last_addr_mask;
         bool force_flush = false;
  
@@ -5081,13 +5088,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
         tlb_change_page_size(tlb, sz);
         tlb_start_vma(tlb, vma);
  
-       /*
-        * If sharing possible, alert mmu notifiers of worst case.
-        */
-       mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
-                               end);
-       adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
-       mmu_notifier_invalidate_range_start(&range);
         last_addr_mask = hugetlb_mask_last_page(h);
         address = start;
         for (; address < end; address += sz) {
@@ -5117,7 +5117,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                  * unmapped and its refcount is dropped, so just clear pte here.
                  */
                 if (unlikely(!pte_present(pte))) {
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
                         /*
                          * If the pte was wr-protected by uffd-wp in any of the
                          * swap forms, meanwhile the caller does not want to
@@ -5129,7 +5128,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                                 set_huge_pte_at(mm, address, ptep,
                                                 make_pte_marker(PTE_MARKER_UFFD_WP));
                         else
-#endif
                                 huge_pte_clear(mm, address, ptep, sz);
                         spin_unlock(ptl);
                         continue;
@@ -5158,13 +5156,11 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                 tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
                 if (huge_pte_dirty(pte))
                         set_page_dirty(page);
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
                 /* Leave a uffd-wp pte marker if needed */
                 if (huge_pte_uffd_wp(pte) &&
                     !(zap_flags & ZAP_FLAG_DROP_MARKER))
                         set_huge_pte_at(mm, address, ptep,
                                         make_pte_marker(PTE_MARKER_UFFD_WP));
-#endif
                 hugetlb_count_sub(pages_per_huge_page(h), mm);
                 page_remove_rmap(page, vma, true);
  
@@ -5176,7 +5172,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                 if (ref_page)
                         break;
         }
-       mmu_notifier_invalidate_range_end(&range);
         tlb_end_vma(tlb, vma);
  
         /*
@@ -5204,6 +5199,7 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
         hugetlb_vma_lock_write(vma);
         i_mmap_lock_write(vma->vm_file->f_mapping);
  
+       /* mmu notification performed in caller */
         __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
  
         if (zap_flags & ZAP_FLAG_UNMAP) {       /* final unmap */
@@ -5228,10 +5224,18 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
                           unsigned long end, struct page *ref_page,
                           zap_flags_t zap_flags)
  {
+       struct mmu_notifier_range range;
         struct mmu_gather tlb;
  
+       mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+                               start, end);
+       adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
+       mmu_notifier_invalidate_range_start(&range);
         tlb_gather_mmu(&tlb, vma->vm_mm);
+
         __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
+
+       mmu_notifier_invalidate_range_end(&range);
         tlb_finish_mmu(&tlb);
  }
  
@@ -5310,9 +5314,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
         unsigned long haddr = address & huge_page_mask(h);
         struct mmu_notifier_range range;
  
-       VM_BUG_ON(unshare && (flags & FOLL_WRITE));
-       VM_BUG_ON(!unshare && !(flags & FOLL_WRITE));
-
         /*
          * hugetlb does not support FOLL_FORCE-style write faults that keep the
          * PTE mapped R/O such as maybe_mkwrite() would do.
@@ -5322,8 +5323,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
  
         /* Let's take out MAP_SHARED mappings first. */
         if (vma->vm_flags & VM_MAYSHARE) {
-               if (unlikely(unshare))
-                       return 0;
                 set_huge_ptep_writable(vma, haddr, ptep);
                 return 0;
         }
@@ -5445,8 +5444,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
         spin_lock(ptl);
         ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
         if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
-               ClearHPageRestoreReserve(new_page);
-
                 /* Break COW or unshare */
                 huge_ptep_clear_flush(vma, haddr, ptep);
                 mmu_notifier_invalidate_range(mm, range.start, range.end);
@@ -5741,10 +5738,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
         if (!pte_same(huge_ptep_get(ptep), old_pte))
                 goto backout;
  
-       if (anon_rmap) {
-               ClearHPageRestoreReserve(page);
+       if (anon_rmap)
                 hugepage_add_new_anon_rmap(page, vma, haddr);
-       } else
+       else
                 page_dup_file_rmap(page, true);
         new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
                                 && (vma->vm_flags & VM_SHARED)));
@@ -6131,12 +6127,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
         if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
                 goto out_release_unlock;
  
-       if (page_in_pagecache) {
+       if (page_in_pagecache)
                 page_dup_file_rmap(page, true);
-       } else {
-               ClearHPageRestoreReserve(page);
+       else
                 hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
-       }
  
         /*
          * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
@@ -6201,7 +6195,8 @@ static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
         }
  }
  
-static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
+static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma,
+                                              unsigned int flags, pte_t *pte,
                                                bool *unshare)
  {
         pte_t pteval = huge_ptep_get(pte);
@@ -6213,13 +6208,69 @@ static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
                 return false;
         if (flags & FOLL_WRITE)
                 return true;
-       if (gup_must_unshare(flags, pte_page(pteval))) {
+       if (gup_must_unshare(vma, flags, pte_page(pteval))) {
                 *unshare = true;
                 return true;
         }
         return false;
  }
  
+struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+                               unsigned long address, unsigned int flags)
+{
+       struct hstate *h = hstate_vma(vma);
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long haddr = address & huge_page_mask(h);
+       struct page *page = NULL;
+       spinlock_t *ptl;
+       pte_t *pte, entry;
+
+       /*
+        * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
+        * follow_hugetlb_page().
+        */
+       if (WARN_ON_ONCE(flags & FOLL_PIN))
+               return NULL;
+
+retry:
+       pte = huge_pte_offset(mm, haddr, huge_page_size(h));
+       if (!pte)
+               return NULL;
+
+       ptl = huge_pte_lock(h, mm, pte);
+       entry = huge_ptep_get(pte);
+       if (pte_present(entry)) {
+               page = pte_page(entry) +
+                               ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
+               /*
+                * Note that page may be a sub-page, and with vmemmap
+                * optimizations the page struct may be read only.
+                * try_grab_page() will increase the ref count on the
+                * head page, so this will be OK.
+                *
+                * try_grab_page() should always be able to get the page here,
+                * because we hold the ptl lock and have verified pte_present().
+                */
+               if (try_grab_page(page, flags)) {
+                       page = NULL;
+                       goto out;
+               }
+       } else {
+               if (is_hugetlb_entry_migration(entry)) {
+                       spin_unlock(ptl);
+                       __migration_entry_wait_huge(pte, ptl);
+                       goto retry;
+               }
+               /*
+                * hwpoisoned entry is treated as no_page_table in
+                * follow_page_mask().
+                */
+       }
+out:
+       spin_unlock(ptl);
+       return page;
+}
+
  long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                          struct page **pages, struct vm_area_struct **vmas,
                          unsigned long *position, unsigned long *nr_pages,
@@ -6286,7 +6337,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                  * directly from any kind of swap entries.
                  */
                 if (absent ||
-                   __follow_hugetlb_must_fault(flags, pte, &unshare)) {
+                   __follow_hugetlb_must_fault(vma, flags, pte, &unshare)) {
                         vm_fault_t ret;
                         unsigned int fault_flags = 0;
  
@@ -6296,9 +6347,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                                 fault_flags |= FAULT_FLAG_WRITE;
                         else if (unshare)
                                 fault_flags |= FAULT_FLAG_UNSHARE;
-                       if (locked)
+                       if (locked) {
                                 fault_flags |= FAULT_FLAG_ALLOW_RETRY |
                                         FAULT_FLAG_KILLABLE;
+                               if (flags & FOLL_INTERRUPTIBLE)
+                                       fault_flags |= FAULT_FLAG_INTERRUPTIBLE;
+                       }
                         if (flags & FOLL_NOWAIT)
                                 fault_flags |= FAULT_FLAG_ALLOW_RETRY |
                                         FAULT_FLAG_RETRY_NOWAIT;
@@ -7214,123 +7268,6 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h)
   * These functions are overwritable if your architecture needs its own
   * behavior.
   */
-struct page * __weak
-follow_huge_addr(struct mm_struct *mm, unsigned long address,
-                             int write)
-{
-       return ERR_PTR(-EINVAL);
-}
-
-struct page * __weak
-follow_huge_pd(struct vm_area_struct *vma,
-              unsigned long address, hugepd_t hpd, int flags, int pdshift)
-{
-       WARN(1, "hugepd follow called with no support for hugepage directory format\n");
-       return NULL;
-}
-
-struct page * __weak
-follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags)
-{
-       struct hstate *h = hstate_vma(vma);
-       struct mm_struct *mm = vma->vm_mm;
-       struct page *page = NULL;
-       spinlock_t *ptl;
-       pte_t *ptep, pte;
-
-       /*
-        * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
-        * follow_hugetlb_page().
-        */
-       if (WARN_ON_ONCE(flags & FOLL_PIN))
-               return NULL;
-
-retry:
-       ptep = huge_pte_offset(mm, address, huge_page_size(h));
-       if (!ptep)
-               return NULL;
-
-       ptl = huge_pte_lock(h, mm, ptep);
-       pte = huge_ptep_get(ptep);
-       if (pte_present(pte)) {
-               page = pte_page(pte) +
-                       ((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
-               /*
-                * try_grab_page() should always be able to get the page here,
-                * because: a) we hold the pmd (ptl) lock, and b) we've just
-                * checked that the huge pmd (head) page is present in the
-                * page tables. The ptl prevents the head page and tail pages
-                * from being rearranged in any way. So this page must be
-                * available at this point, unless the page refcount
-                * overflowed:
-                */
-               if (try_grab_page(page, flags)) {
-                       page = NULL;
-                       goto out;
-               }
-       } else {
-               if (is_hugetlb_entry_migration(pte)) {
-                       spin_unlock(ptl);
-                       __migration_entry_wait_huge(ptep, ptl);
-                       goto retry;
-               }
-               /*
-                * hwpoisoned entry is treated as no_page_table in
-                * follow_page_mask().
-                */
-       }
-out:
-       spin_unlock(ptl);
-       return page;
-}
-
-struct page * __weak
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
-               pud_t *pud, int flags)
-{
-       struct page *page = NULL;
-       spinlock_t *ptl;
-       pte_t pte;
-
-       if (WARN_ON_ONCE(flags & FOLL_PIN))
-               return NULL;
-
-retry:
-       ptl = huge_pte_lock(hstate_sizelog(PUD_SHIFT), mm, (pte_t *)pud);
-       if (!pud_huge(*pud))
-               goto out;
-       pte = huge_ptep_get((pte_t *)pud);
-       if (pte_present(pte)) {
-               page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-               if (try_grab_page(page, flags)) {
-                       page = NULL;
-                       goto out;
-               }
-       } else {
-               if (is_hugetlb_entry_migration(pte)) {
-                       spin_unlock(ptl);
-                       __migration_entry_wait(mm, (pte_t *)pud, ptl);
-                       goto retry;
-               }
-               /*
-                * hwpoisoned entry is treated as no_page_table in
-                * follow_page_mask().
-                */
-       }
-out:
-       spin_unlock(ptl);
-       return page;
-}
-
-struct page * __weak
-follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags)
-{
-       if (flags & (FOLL_GET | FOLL_PIN))
-               return NULL;
-
-       return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
-}
-
  int isolate_hugetlb(struct page *page, struct list_head *list)
  {
         int ret = 0;
@@ -7349,7 +7286,7 @@ int isolate_hugetlb(struct page *page, struct list_head *list)
         return ret;
  }
  
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
  {
         int ret = 0;
  
@@ -7359,7 +7296,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
                 *hugetlb = true;
                 if (HPageFreed(page))
                         ret = 0;
-               else if (HPageMigratable(page))
+               else if (HPageMigratable(page) || unpoison)
                         ret = get_page_unless_zero(page);
                 else
                         ret = -EBUSY;
@@ -7368,12 +7305,13 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
         return ret;
  }
  
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+                               bool *migratable_cleared)
  {
         int ret;
  
         spin_lock_irq(&hugetlb_lock);
-       ret = __get_huge_page_for_hwpoison(pfn, flags);
+       ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
         spin_unlock_irq(&hugetlb_lock);
         return ret;
  }
@@ -7387,15 +7325,15 @@ void putback_active_hugepage(struct page *page)
         put_page(page);
  }
  
-void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
+void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason)
  {
-       struct hstate *h = page_hstate(oldpage);
+       struct hstate *h = folio_hstate(old_folio);
  
-       hugetlb_cgroup_migrate(oldpage, newpage);
-       set_page_owner_migrate_reason(newpage, reason);
+       hugetlb_cgroup_migrate(old_folio, new_folio);
+       set_page_owner_migrate_reason(&new_folio->page, reason);
  
         /*
-        * transfer temporary state of the new huge page. This is
+        * transfer temporary state of the new hugetlb folio. This is
          * reverse to other transitions because the newpage is going to
          * be final while the old one will be freed so it takes over
          * the temporary status.
@@ -7404,12 +7342,13 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
          * here as well otherwise the global surplus count will not match
          * the per-node's.
          */
-       if (HPageTemporary(newpage)) {
-               int old_nid = page_to_nid(oldpage);
-               int new_nid = page_to_nid(newpage);
+       if (folio_test_hugetlb_temporary(new_folio)) {
+               int old_nid = folio_nid(old_folio);
+               int new_nid = folio_nid(new_folio);
+
+               folio_set_hugetlb_temporary(old_folio);
+               folio_clear_hugetlb_temporary(new_folio);
  
-               SetHPageTemporary(oldpage);
-               ClearHPageTemporary(newpage);
  
                 /*
                  * There is no need to transfer the per-node surplus state