khugepaged: skip collapse if uffd-wp detected
authorPeter Xu <peterx@redhat.com>
Tue, 7 Apr 2020 03:06:04 +0000 (20:06 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 7 Apr 2020 17:43:39 +0000 (10:43 -0700)
Don't collapse the huge PMD if there is any userfault write protected
small PTEs.  The problem is that the write protection is in small page
granularity and there's no way to keep all these write protection
information if the small pages are going to be merged into a huge PMD.

The same thing needs to be considered for swap entries and migration
entries.  So do the check as well disregarding khugepaged_max_ptes_swap.

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Bobby Powers <bobbypowers@gmail.com>
Cc: Brian Geffon <bgeffon@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Denis Plotnikov <dplotnikov@virtuozzo.com>
Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Martin Cracauer <cracauer@cons.org>
Cc: Marty McFadden <mcfadden8@llnl.gov>
Cc: Maya Gokhale <gokhale2@llnl.gov>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@fb.com>
Link: http://lkml.kernel.org/r/20200220163112.11409-12-peterx@redhat.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/trace/events/huge_memory.h
mm/khugepaged.c

index d82a0f4e824dd55e74a6791e81ee76a04c4c2d1a..70e32ff096ec0c2b84a24a4b7df00def3aba8b98 100644 (file)
@@ -13,6 +13,7 @@
        EM( SCAN_PMD_NULL,              "pmd_null")                     \
        EM( SCAN_EXCEED_NONE_PTE,       "exceed_none_pte")              \
        EM( SCAN_PTE_NON_PRESENT,       "pte_non_present")              \
+       EM( SCAN_PTE_UFFD_WP,           "pte_uffd_wp")                  \
        EM( SCAN_PAGE_RO,               "no_writable_page")             \
        EM( SCAN_LACK_REFERENCED_PAGE,  "lack_referenced_page")         \
        EM( SCAN_PAGE_NULL,             "page_null")                    \
index 3afc1e2d7a55d89a1df729fda47409b13c4a33bd..99d77ffb79c2b39b2cd1cc4dfd72c4bf1684f3d6 100644 (file)
@@ -29,6 +29,7 @@ enum scan_result {
        SCAN_PMD_NULL,
        SCAN_EXCEED_NONE_PTE,
        SCAN_PTE_NON_PRESENT,
+       SCAN_PTE_UFFD_WP,
        SCAN_PAGE_RO,
        SCAN_LACK_REFERENCED_PAGE,
        SCAN_PAGE_NULL,
@@ -1137,6 +1138,15 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                pte_t pteval = *_pte;
                if (is_swap_pte(pteval)) {
                        if (++unmapped <= khugepaged_max_ptes_swap) {
+                               /*
+                                * Always be strict with uffd-wp
+                                * enabled swap entries.  Please see
+                                * comment below for pte_uffd_wp().
+                                */
+                               if (pte_swp_uffd_wp(pteval)) {
+                                       result = SCAN_PTE_UFFD_WP;
+                                       goto out_unmap;
+                               }
                                continue;
                        } else {
                                result = SCAN_EXCEED_SWAP_PTE;
@@ -1156,6 +1166,19 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
                        result = SCAN_PTE_NON_PRESENT;
                        goto out_unmap;
                }
+               if (pte_uffd_wp(pteval)) {
+                       /*
+                        * Don't collapse the page if any of the small
+                        * PTEs are armed with uffd write protection.
+                        * Here we can also mark the new huge pmd as
+                        * write protected if any of the small ones is
+                        * marked but that could bring uknown
+                        * userfault messages that falls outside of
+                        * the registered range.  So, just be simple.
+                        */
+                       result = SCAN_PTE_UFFD_WP;
+                       goto out_unmap;
+               }
                if (pte_write(pteval))
                        writable = true;