Merge tag 'mm-stable-2024-05-17-19-19' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / mm / memory-failure.c
index 9e62a00b46ddee5899f85cfc252dabd7c0d04121..16ada4fb02b799517fb65c93c833b4920f781974 100644 (file)
@@ -141,7 +141,6 @@ static struct ctl_table memory_failure_table[] = {
                .extra1         = SYSCTL_ZERO,
                .extra2         = SYSCTL_ONE,
        },
-       { }
 };
 
 /*
@@ -156,7 +155,7 @@ static int __page_handle_poison(struct page *page)
 
        /*
         * zone_pcp_disable() can't be used here. It will
-        * hold pcp_batch_high_lock and dissolve_free_huge_page() might hold
+        * hold pcp_batch_high_lock and dissolve_free_hugetlb_folio() might hold
         * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
         * optimization is enabled. This will break current lock dependency
         * chain and leads to deadlock.
@@ -166,7 +165,7 @@ static int __page_handle_poison(struct page *page)
         * but nothing guarantees that those pages do not get back to a PCP
         * queue if we need to refill those.
         */
-       ret = dissolve_free_huge_page(page);
+       ret = dissolve_free_hugetlb_folio(page_folio(page));
        if (!ret) {
                drain_all_pages(page_zone(page));
                ret = take_page_off_buddy(page);
@@ -179,8 +178,8 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo
 {
        if (hugepage_or_freepage) {
                /*
-                * Doing this check for free pages is also fine since dissolve_free_huge_page
-                * returns 0 for non-hugetlb pages as well.
+                * Doing this check for free pages is also fine since
+                * dissolve_free_hugetlb_folio() returns 0 for non-hugetlb folios as well.
                 */
                if (__page_handle_poison(page) <= 0)
                        /*
@@ -217,6 +216,7 @@ EXPORT_SYMBOL_GPL(hwpoison_filter_flags_value);
 
 static int hwpoison_filter_dev(struct page *p)
 {
+       struct folio *folio = page_folio(p);
        struct address_space *mapping;
        dev_t dev;
 
@@ -224,7 +224,7 @@ static int hwpoison_filter_dev(struct page *p)
            hwpoison_filter_dev_minor == ~0U)
                return 0;
 
-       mapping = page_mapping(p);
+       mapping = folio_mapping(folio);
        if (mapping == NULL || mapping->host == NULL)
                return -EINVAL;
 
@@ -370,20 +370,25 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
  * Unknown page type encountered. Try to check whether it can turn PageLRU by
  * lru_add_drain_all.
  */
-void shake_page(struct page *p)
+void shake_folio(struct folio *folio)
 {
-       if (PageHuge(p))
+       if (folio_test_hugetlb(folio))
                return;
        /*
         * TODO: Could shrink slab caches here if a lightweight range-based
         * shrinker will be available.
         */
-       if (PageSlab(p))
+       if (folio_test_slab(folio))
                return;
 
        lru_add_drain_all();
 }
-EXPORT_SYMBOL_GPL(shake_page);
+EXPORT_SYMBOL_GPL(shake_folio);
+
+static void shake_page(struct page *page)
+{
+       shake_folio(page_folio(page));
+}
 
 static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
                unsigned long address)
@@ -428,21 +433,13 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
  * not much we can do. We just print a message and ignore otherwise.
  */
 
-#define FSDAX_INVALID_PGOFF ULONG_MAX
-
 /*
  * Schedule a process for later kill.
  * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
- *
- * Note: @fsdax_pgoff is used only when @p is a fsdax page and a
- * filesystem with a memory failure handler has claimed the
- * memory_failure event. In all other cases, page->index and
- * page->mapping are sufficient for mapping the page back to its
- * corresponding user virtual address.
  */
 static void __add_to_kill(struct task_struct *tsk, struct page *p,
                          struct vm_area_struct *vma, struct list_head *to_kill,
-                         unsigned long ksm_addr, pgoff_t fsdax_pgoff)
+                         unsigned long addr)
 {
        struct to_kill *tk;
 
@@ -452,12 +449,10 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
                return;
        }
 
-       tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
-       if (is_zone_device_page(p)) {
-               if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
-                       tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
+       tk->addr = addr;
+       if (is_zone_device_page(p))
                tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
-       else
+       else
                tk->size_shift = page_shift(compound_head(p));
 
        /*
@@ -484,10 +479,12 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
 }
 
 static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
-                                 struct vm_area_struct *vma,
-                                 struct list_head *to_kill)
+               struct vm_area_struct *vma, struct list_head *to_kill,
+               unsigned long addr)
 {
-       __add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF);
+       if (addr == -EFAULT)
+               return;
+       __add_to_kill(tsk, p, vma, to_kill, addr);
 }
 
 #ifdef CONFIG_KSM
@@ -503,12 +500,13 @@ static bool task_in_to_kill_list(struct list_head *to_kill,
 
        return false;
 }
+
 void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
                     struct vm_area_struct *vma, struct list_head *to_kill,
-                    unsigned long ksm_addr)
+                    unsigned long addr)
 {
        if (!task_in_to_kill_list(to_kill, tsk))
-               __add_to_kill(tsk, p, vma, to_kill, ksm_addr, FSDAX_INVALID_PGOFF);
+               __add_to_kill(tsk, p, vma, to_kill, addr);
 }
 #endif
 /*
@@ -610,7 +608,6 @@ struct task_struct *task_early_kill(struct task_struct *tsk, int force_early)
 static void collect_procs_anon(struct folio *folio, struct page *page,
                struct list_head *to_kill, int force_early)
 {
-       struct vm_area_struct *vma;
        struct task_struct *tsk;
        struct anon_vma *av;
        pgoff_t pgoff;
@@ -622,8 +619,10 @@ static void collect_procs_anon(struct folio *folio, struct page *page,
        pgoff = page_to_pgoff(page);
        rcu_read_lock();
        for_each_process(tsk) {
+               struct vm_area_struct *vma;
                struct anon_vma_chain *vmac;
                struct task_struct *t = task_early_kill(tsk, force_early);
+               unsigned long addr;
 
                if (!t)
                        continue;
@@ -632,9 +631,8 @@ static void collect_procs_anon(struct folio *folio, struct page *page,
                        vma = vmac->vma;
                        if (vma->vm_mm != t->mm)
                                continue;
-                       if (!page_mapped_in_vma(page, vma))
-                               continue;
-                       add_to_kill_anon_file(t, page, vma, to_kill);
+                       addr = page_mapped_in_vma(page, vma);
+                       add_to_kill_anon_file(t, page, vma, to_kill, addr);
                }
        }
        rcu_read_unlock();
@@ -657,6 +655,7 @@ static void collect_procs_file(struct folio *folio, struct page *page,
        pgoff = page_to_pgoff(page);
        for_each_process(tsk) {
                struct task_struct *t = task_early_kill(tsk, force_early);
+               unsigned long addr;
 
                if (!t)
                        continue;
@@ -669,8 +668,10 @@ static void collect_procs_file(struct folio *folio, struct page *page,
                         * Assume applications who requested early kill want
                         * to be informed of all such data corruptions.
                         */
-                       if (vma->vm_mm == t->mm)
-                               add_to_kill_anon_file(t, page, vma, to_kill);
+                       if (vma->vm_mm != t->mm)
+                               continue;
+                       addr = page_address_in_vma(page, vma);
+                       add_to_kill_anon_file(t, page, vma, to_kill, addr);
                }
        }
        rcu_read_unlock();
@@ -682,7 +683,8 @@ static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p,
                              struct vm_area_struct *vma,
                              struct list_head *to_kill, pgoff_t pgoff)
 {
-       __add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
+       unsigned long addr = vma_address(vma, pgoff, 1);
+       __add_to_kill(tsk, p, vma, to_kill, addr);
 }
 
 /*
@@ -727,9 +729,9 @@ static void collect_procs(struct folio *folio, struct page *page,
 {
        if (!folio->mapping)
                return;
-       if (unlikely(PageKsm(page)))
-               collect_procs_ksm(page, tokill, force_early);
-       else if (PageAnon(page))
+       if (unlikely(folio_test_ksm(folio)))
+               collect_procs_ksm(folio, page, tokill, force_early);
+       else if (folio_test_anon(folio))
                collect_procs_anon(folio, page, tokill, force_early);
        else
                collect_procs_file(folio, page, tokill, force_early);
@@ -1089,7 +1091,8 @@ out:
  */
 static int me_pagecache_dirty(struct page_state *ps, struct page *p)
 {
-       struct address_space *mapping = page_mapping(p);
+       struct folio *folio = page_folio(p);
+       struct address_space *mapping = folio_mapping(folio);
 
        SetPageError(p);
        /* TBD: print more information about the file. */
@@ -1251,7 +1254,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
 #define mlock          (1UL << PG_mlocked)
 #define lru            (1UL << PG_lru)
 #define head           (1UL << PG_head)
-#define slab           (1UL << PG_slab)
 #define reserved       (1UL << PG_reserved)
 
 static struct page_state error_states[] = {
@@ -1261,13 +1263,6 @@ static struct page_state error_states[] = {
         * PG_buddy pages only make a small fraction of all free pages.
         */
 
-       /*
-        * Could in theory check if slab page is free or if we can drop
-        * currently unused objects without touching them. But just
-        * treat it as standard kernel for now.
-        */
-       { slab,         slab,           MF_MSG_SLAB,    me_kernel },
-
        { head,         head,           MF_MSG_HUGE,            me_huge_page },
 
        { sc|dirty,     sc|dirty,       MF_MSG_DIRTY_SWAPCACHE, me_swapcache_dirty },
@@ -1294,7 +1289,6 @@ static struct page_state error_states[] = {
 #undef mlock
 #undef lru
 #undef head
-#undef slab
 #undef reserved
 
 static void update_per_node_mf_stats(unsigned long pfn,
@@ -1567,24 +1561,24 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
  * Do all that is necessary to remove user space mappings. Unmap
  * the pages and send SIGBUS to the processes if the data was dirty.
  */
-static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
-                                 int flags, struct page *hpage)
+static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
+               unsigned long pfn, int flags)
 {
-       struct folio *folio = page_folio(hpage);
        enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
        struct address_space *mapping;
        LIST_HEAD(tokill);
        bool unmap_success;
        int forcekill;
-       bool mlocked = PageMlocked(hpage);
+       bool mlocked = folio_test_mlocked(folio);
 
        /*
         * Here we are interested only in user-mapped pages, so skip any
         * other types of pages.
         */
-       if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
+       if (folio_test_reserved(folio) || folio_test_slab(folio) ||
+           folio_test_pgtable(folio) || folio_test_offline(folio))
                return true;
-       if (!(PageLRU(hpage) || PageHuge(p)))
+       if (!(folio_test_lru(folio) || folio_test_hugetlb(folio)))
                return true;
 
        /*
@@ -1594,7 +1588,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
        if (!page_mapped(p))
                return true;
 
-       if (PageSwapCache(p)) {
+       if (folio_test_swapcache(folio)) {
                pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
                ttu &= ~TTU_HWPOISON;
        }
@@ -1605,11 +1599,11 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
         * XXX: the dirty test could be racy: set_page_dirty() may not always
         * be called inside page lock (it's recommended but not enforced).
         */
-       mapping = page_mapping(hpage);
-       if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
+       mapping = folio_mapping(folio);
+       if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping &&
            mapping_can_writeback(mapping)) {
-               if (page_mkclean(hpage)) {
-                       SetPageDirty(hpage);
+               if (folio_mkclean(folio)) {
+                       folio_set_dirty(folio);
                } else {
                        ttu &= ~TTU_HWPOISON;
                        pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
@@ -1624,7 +1618,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
         */
        collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED);
 
-       if (PageHuge(hpage) && !PageAnon(hpage)) {
+       if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
                /*
                 * For hugetlb pages in shared mappings, try_to_unmap
                 * could potentially call huge_pmd_unshare.  Because of
@@ -1632,7 +1626,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
                 * TTU_RMAP_LOCKED to indicate we have taken the lock
                 * at this higher level.
                 */
-               mapping = hugetlb_page_mapping_lock_write(hpage);
+               mapping = hugetlb_folio_mapping_lock_write(folio);
                if (mapping) {
                        try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
                        i_mmap_unlock_write(mapping);
@@ -1644,15 +1638,15 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 
        unmap_success = !page_mapped(p);
        if (!unmap_success)
-               pr_err("%#lx: failed to unmap page (mapcount=%d)\n",
-                      pfn, page_mapcount(p));
+               pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n",
+                      pfn, folio_mapcount(page_folio(p)));
 
        /*
         * try_to_unmap() might put mlocked page in lru cache, so call
         * shake_page() again to ensure that it's flushed.
         */
        if (mlocked)
-               shake_page(hpage);
+               shake_folio(folio);
 
        /*
         * Now that the dirty bit has been propagated to the
@@ -1664,7 +1658,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
         * use a more force-full uncatchable kill to prevent
         * any accesses to the poisoned memory.
         */
-       forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL) ||
+       forcekill = folio_test_dirty(folio) || (flags & MF_MUST_KILL) ||
                    !unmap_success;
        kill_procs(&tokill, forcekill, !unmap_success, pfn, flags);
 
@@ -2108,7 +2102,7 @@ retry:
 
        page_flags = folio->flags;
 
-       if (!hwpoison_user_mappings(p, pfn, flags, &folio->page)) {
+       if (!hwpoison_user_mappings(folio, p, pfn, flags)) {
                folio_unlock(folio);
                return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
        }
@@ -2197,7 +2191,7 @@ out:
 int memory_failure(unsigned long pfn, int flags)
 {
        struct page *p;
-       struct page *hpage;
+       struct folio *folio;
        struct dev_pagemap *pgmap;
        int res = 0;
        unsigned long page_flags;
@@ -2285,8 +2279,8 @@ try_again:
                }
        }
 
-       hpage = compound_head(p);
-       if (PageTransHuge(hpage)) {
+       folio = page_folio(p);
+       if (folio_test_large(folio)) {
                /*
                 * The flag must be set after the refcount is bumped
                 * otherwise it may race with THP split.
@@ -2300,12 +2294,13 @@ try_again:
                 * or unhandlable page.  The refcount is bumped iff the
                 * page is a valid handlable page.
                 */
-               SetPageHasHWPoisoned(hpage);
+               folio_set_has_hwpoisoned(folio);
                if (try_to_split_thp_page(p) < 0) {
                        res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
                        goto unlock_mutex;
                }
                VM_BUG_ON_PAGE(!page_count(p), p);
+               folio = page_folio(p);
        }
 
        /*
@@ -2316,9 +2311,9 @@ try_again:
         * The check (unnecessarily) ignores LRU pages being isolated and
         * walked by the page reclaim code, however that's not a big loss.
         */
-       shake_page(p);
+       shake_folio(folio);
 
-       lock_page(p);
+       folio_lock(folio);
 
        /*
         * We're only intended to deal with the non-Compound page here.
@@ -2326,11 +2321,11 @@ try_again:
         * race window. If this happens, we could try again to hopefully
         * handle the page next round.
         */
-       if (PageCompound(p)) {
+       if (folio_test_large(folio)) {
                if (retry) {
                        ClearPageHWPoison(p);
-                       unlock_page(p);
-                       put_page(p);
+                       folio_unlock(folio);
+                       folio_put(folio);
                        flags &= ~MF_COUNT_INCREASED;
                        retry = false;
                        goto try_again;
@@ -2346,35 +2341,35 @@ try_again:
         * folio_remove_rmap_*() in try_to_unmap_one(). So to determine page
         * status correctly, we save a copy of the page flags at this time.
         */
-       page_flags = p->flags;
+       page_flags = folio->flags;
 
        if (hwpoison_filter(p)) {
                ClearPageHWPoison(p);
-               unlock_page(p);
-               put_page(p);
+               folio_unlock(folio);
+               folio_put(folio);
                res = -EOPNOTSUPP;
                goto unlock_mutex;
        }
 
        /*
-        * __munlock_folio() may clear a writeback page's LRU flag without
-        * page_lock. We need wait writeback completion for this page or it
-        * may trigger vfs BUG while evict inode.
+        * __munlock_folio() may clear a writeback folio's LRU flag without
+        * the folio lock. We need to wait for writeback completion for this
+        * folio or it may trigger a vfs BUG while evicting inode.
         */
-       if (!PageLRU(p) && !PageWriteback(p))
+       if (!folio_test_lru(folio) && !folio_test_writeback(folio))
                goto identify_page_state;
 
        /*
         * It's very difficult to mess with pages currently under IO
         * and in many cases impossible, so we just avoid it here.
         */
-       wait_on_page_writeback(p);
+       folio_wait_writeback(folio);
 
        /*
         * Now take care of user space mappings.
         * Abort on fail: __filemap_remove_folio() assumes unmapped page.
         */
-       if (!hwpoison_user_mappings(p, pfn, flags, p)) {
+       if (!hwpoison_user_mappings(folio, p, pfn, flags)) {
                res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
                goto unlock_page;
        }
@@ -2382,7 +2377,8 @@ try_again:
        /*
         * Torn down by someone else?
         */
-       if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+       if (folio_test_lru(folio) && !folio_test_swapcache(folio) &&
+           folio->mapping == NULL) {
                res = action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
                goto unlock_page;
        }
@@ -2392,7 +2388,7 @@ identify_page_state:
        mutex_unlock(&mf_mutex);
        return res;
 unlock_page:
-       unlock_page(p);
+       folio_unlock(folio);
 unlock_mutex:
        mutex_unlock(&mf_mutex);
        return res;
@@ -2562,8 +2558,8 @@ int unpoison_memory(unsigned long pfn)
                goto unlock_mutex;
        }
 
-       if (folio_test_slab(folio) || PageTable(&folio->page) ||
-           folio_test_reserved(folio) || PageOffline(&folio->page))
+       if (folio_test_slab(folio) || folio_test_pgtable(folio) ||
+           folio_test_reserved(folio) || folio_test_offline(folio))
                goto unlock_mutex;
 
        /*
@@ -2584,7 +2580,7 @@ int unpoison_memory(unsigned long pfn)
 
        ghp = get_hwpoison_page(p, MF_UNPOISON);
        if (!ghp) {
-               if (PageHuge(p)) {
+               if (folio_test_hugetlb(folio)) {
                        huge = true;
                        count = folio_free_raw_hwp(folio, false);
                        if (count == 0)
@@ -2600,7 +2596,7 @@ int unpoison_memory(unsigned long pfn)
                                         pfn, &unpoison_rs);
                }
        } else {
-               if (PageHuge(p)) {
+               if (folio_test_hugetlb(folio)) {
                        huge = true;
                        count = folio_free_raw_hwp(folio, false);
                        if (count == 0) {
@@ -2678,6 +2674,7 @@ static int soft_offline_in_use_page(struct page *page)
        struct migration_target_control mtc = {
                .nid = NUMA_NO_NODE,
                .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+               .reason = MR_MEMORY_FAILURE,
        };
 
        if (!huge && folio_test_large(folio)) {