mm: check __PG_HWPOISON separately from PAGE_FLAGS_CHECK_AT_*
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Thu, 6 Aug 2015 22:47:08 +0000 (15:47 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Aug 2015 01:39:42 +0000 (04:39 +0300)
The race condition addressed in commit add05cecef80 ("mm: soft-offline:
don't free target page in successful page migration") was not closed
completely, because that can happen not only for soft-offline, but also
for hard-offline.  Consider that a slab page is about to be freed into
buddy pool, and then an uncorrected memory error hits the page just
after entering __free_one_page(), then VM_BUG_ON_PAGE(page->flags &
PAGE_FLAGS_CHECK_AT_PREP) is triggered, despite the fact that it's not
necessary because the data on the affected page is not consumed.

To solve it, this patch drops __PG_HWPOISON from page flag checks at
allocation/free time.  I think it's justified because __PG_HWPOISON
flags is defined to prevent the page from being reused, and setting it
outside the page's alloc-free cycle is a designed behavior (not a bug.)

For recent months, I was annoyed about BUG_ON when soft-offlined page
remains on lru cache list for a while, which is avoided by calling
put_page() instead of putback_lru_page() in page migration's success
path.  This means that this patch reverts a major change from commit
add05cecef80 about the new refcounting rule of soft-offlined pages, so
"reuse window" revives.  This will be closed by a subsequent patch.

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Dean Nelson <dnelson@redhat.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/page-flags.h
mm/huge_memory.c
mm/migrate.c
mm/page_alloc.c

index f34e040b34e9ffbf0abe66b439571ec8902440ca..41c93844fb1d1ed5c0dbad77fe5a409557d66067 100644 (file)
@@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
         1 << PG_private | 1 << PG_private_2 | \
         1 << PG_writeback | 1 << PG_reserved | \
         1 << PG_slab    | 1 << PG_swapcache | 1 << PG_active | \
-        1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
+        1 << PG_unevictable | __PG_MLOCKED | \
         __PG_COMPOUND_LOCK)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
- * Pages being prepped should not have any flags set.  It they are set,
+ * Pages being prepped should not have these flags set.  It they are set,
  * there has been a kernel bug or struct page corruption.
+ *
+ * __PG_HWPOISON is exceptional because it needs to be kept beyond page's
+ * alloc-free cycle to prevent from reusing the page.
  */
-#define PAGE_FLAGS_CHECK_AT_PREP       ((1 << NR_PAGEFLAGS) - 1)
+#define PAGE_FLAGS_CHECK_AT_PREP       \
+       (((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
 
 #define PAGE_FLAGS_PRIVATE                             \
        (1 << PG_private | 1 << PG_private_2)
index c107094f79bae9ee895bd6bf30976d900f16c141..097c7a4bfbd9f13f4845acae80d73aa7b0e66fb2 100644 (file)
@@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page,
                /* after clearing PageTail the gup refcount can be released */
                smp_mb__after_atomic();
 
-               /*
-                * retain hwpoison flag of the poisoned tail page:
-                *   fix for the unsuitable process killed on Guest Machine(KVM)
-                *   by the memory-failure.
-                */
-               page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
+               page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
                page_tail->flags |= (page->flags &
                                     ((1L << PG_referenced) |
                                      (1L << PG_swapbacked) |
index ee401e4e5ef187c92247d03dd6d2ea0893092d1c..f2415be7d93ba80f1b502fad1876221817c0b32c 100644 (file)
@@ -950,7 +950,10 @@ out:
                list_del(&page->lru);
                dec_zone_page_state(page, NR_ISOLATED_ANON +
                                page_is_file_cache(page));
-               if (reason != MR_MEMORY_FAILURE)
+               /* Soft-offlined page shouldn't go through lru cache list */
+               if (reason == MR_MEMORY_FAILURE)
+                       put_page(page);
+               else
                        putback_lru_page(page);
        }
 
index cb61f44eb3fc4420406d72a04ad168b55524c47f..beda4171080232f37e779a3658045e1f813a1cff 100644 (file)
@@ -1296,6 +1296,10 @@ static inline int check_new_page(struct page *page)
                bad_reason = "non-NULL mapping";
        if (unlikely(atomic_read(&page->_count) != 0))
                bad_reason = "nonzero _count";
+       if (unlikely(page->flags & __PG_HWPOISON)) {
+               bad_reason = "HWPoisoned (hardware-corrupted)";
+               bad_flags = __PG_HWPOISON;
+       }
        if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
                bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
                bad_flags = PAGE_FLAGS_CHECK_AT_PREP;