mm/compaction: Disable compact_unevictable_allowed on RT
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Thu, 2 Apr 2020 04:10:42 +0000 (21:10 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 2 Apr 2020 16:35:31 +0000 (09:35 -0700)
Since commit 5bbe3547aa3ba ("mm: allow compaction of unevictable pages")
it is allowed to examine mlocked pages and compact them by default.  On
-RT even minor pagefaults are problematic because it may take a few 100us
to resolve them and until then the task is blocked.

Make compact_unevictable_allowed = 0 default and issue a warning on RT if
it is changed.

[bigeasy@linutronix.de: v5]
Link: https://lore.kernel.org/linux-mm/20190710144138.qyn4tuttdq6h7kqx@linutronix.de/
Link: http://lkml.kernel.org/r/20200319165536.ovi75tsr2seared4@linutronix.de
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Iurii Zaikin <yzaikin@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Link: https://lore.kernel.org/linux-mm/20190710144138.qyn4tuttdq6h7kqx@linutronix.de/
Link: http://lkml.kernel.org/r/20200303202225.nhqc3v5gwlb7x6et@linutronix.de
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Documentation/admin-guide/sysctl/vm.rst
kernel/sysctl.c
mm/compaction.c

index 64aeee1009cab2265d7832cf0694e03b43ce9559..0329a4d3fa9ecf89046d32086372394b070d1681 100644 (file)
@@ -128,6 +128,9 @@ allowed to examine the unevictable lru (mlocked pages) for pages to compact.
 This should be used on systems where stalls for minor page faults are an
 acceptable trade for large contiguous free memory.  Set to 0 to prevent
 compaction from moving pages that are unevictable.  Default value is 1.
+On CONFIG_PREEMPT_RT the default value is 0 in order to avoid a page fault, due
+to compaction, which would block the task from becomming active until the fault
+is resolved.
 
 
 dirty_background_bytes
index cb650bb9da68e92637fb55805a610ad77a31a641..8a176d8727a3ab8e12c4b48fe9ff7c3afdd97465 100644 (file)
@@ -212,6 +212,11 @@ static int proc_do_cad_pid(struct ctl_table *table, int write,
                  void __user *buffer, size_t *lenp, loff_t *ppos);
 static int proc_taint(struct ctl_table *table, int write,
                               void __user *buffer, size_t *lenp, loff_t *ppos);
+#ifdef CONFIG_COMPACTION
+static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
+                                              int write, void __user *buffer,
+                                              size_t *lenp, loff_t *ppos);
+#endif
 #endif
 
 #ifdef CONFIG_PRINTK
@@ -1467,7 +1472,7 @@ static struct ctl_table vm_table[] = {
                .data           = &sysctl_compact_unevictable_allowed,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = proc_dointvec_minmax_warn_RT_change,
                .extra1         = SYSCTL_ZERO,
                .extra2         = SYSCTL_ONE,
        },
@@ -2555,6 +2560,28 @@ int proc_dointvec(struct ctl_table *table, int write,
        return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
 }
 
+#ifdef CONFIG_COMPACTION
+static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
+                                              int write, void __user *buffer,
+                                              size_t *lenp, loff_t *ppos)
+{
+       int ret, old;
+
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
+               return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+       old = *(int *)table->data;
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (ret)
+               return ret;
+       if (old != *(int *)table->data)
+               pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
+                            table->procname, current->comm,
+                            task_pid_nr(current));
+       return ret;
+}
+#endif
+
 /**
  * proc_douintvec - read a vector of unsigned integers
  * @table: the sysctl table
index 07947387244a0da9a29f805b65db2cb136bb934c..c589ead54fb3da2959e024d10843e0aba76bd8d0 100644 (file)
@@ -1594,7 +1594,11 @@ typedef enum {
  * Allow userspace to control policy on scanning the unevictable LRU for
  * compactable pages.
  */
+#ifdef CONFIG_PREEMPT_RT
+int sysctl_compact_unevictable_allowed __read_mostly = 0;
+#else
 int sysctl_compact_unevictable_allowed __read_mostly = 1;
+#endif
 
 static inline void
 update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)