powerpc, membarrier: Skip memory barrier in switch_mm()
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>
Mon, 29 Jan 2018 20:20:11 +0000 (15:20 -0500)
committerIngo Molnar <mingo@kernel.org>
Mon, 5 Feb 2018 20:34:02 +0000 (21:34 +0100)
Allow PowerPC to skip the full memory barrier in switch_mm(), and
only issue the barrier when scheduling into a task belonging to a
process that has registered to use expedited private.

Threads targeting the same VM but which belong to different thread
groups is a tricky case. It has a few consequences:

It turns out that we cannot rely on get_nr_threads(p) to count the
number of threads using a VM. We can use
(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)
instead to skip the synchronize_sched() for cases where the VM only has
a single user, and that user only has a single thread.

It also turns out that we cannot use for_each_thread() to set
thread flags in all threads using a VM, as it only iterates on the
thread group.

Therefore, test the membarrier state variable directly rather than
relying on thread flags. This means
membarrier_register_private_expedited() needs to set the
MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and
only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows
private expedited membarrier commands to succeed.
membarrier_arch_switch_mm() now tests for the
MEMBARRIER_STATE_PRIVATE_EXPEDITED flag.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrea Parri <parri.andrea@gmail.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Avi Kivity <avi@scylladb.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: David Sehr <sehr@google.com>
Cc: Greg Hackmann <ghackmann@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maged Michael <maged.michael@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
MAINTAINERS
arch/powerpc/Kconfig
arch/powerpc/include/asm/membarrier.h [new file with mode: 0644]
arch/powerpc/mm/mmu_context.c
include/linux/sched/mm.h
init/Kconfig
kernel/sched/core.c
kernel/sched/membarrier.c

index 217a8759e897e951b927eb2aac76b60247e351d9..8e96d4e9677bd889f981d69da185c08e7e6c248e 100644 (file)
@@ -8944,6 +8944,7 @@ L:        linux-kernel@vger.kernel.org
 S:     Supported
 F:     kernel/sched/membarrier.c
 F:     include/uapi/linux/membarrier.h
 S:     Supported
 F:     kernel/sched/membarrier.c
 F:     include/uapi/linux/membarrier.h
+F:     arch/powerpc/include/asm/membarrier.h
 
 MEMORY MANAGEMENT
 L:     linux-mm@kvack.org
 
 MEMORY MANAGEMENT
 L:     linux-mm@kvack.org
index 2ed525a44734cfaa62ba7125fab11225c1778a7a..a2380de50878abf37b6089db816400d6847332d9 100644 (file)
@@ -140,6 +140,7 @@ config PPC
        select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_PMEM_API                if PPC64
        select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_PMEM_API                if PPC64
+       select ARCH_HAS_MEMBARRIER_CALLBACKS
        select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE
        select ARCH_HAS_SG_CHAIN
        select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_HAS_SCALED_CPUTIME          if VIRT_CPU_ACCOUNTING_NATIVE
        select ARCH_HAS_SG_CHAIN
        select ARCH_HAS_TICK_BROADCAST          if GENERIC_CLOCKEVENTS_BROADCAST
diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h
new file mode 100644 (file)
index 0000000..98ff4f1
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef _ASM_POWERPC_MEMBARRIER_H
+#define _ASM_POWERPC_MEMBARRIER_H
+
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+                                            struct mm_struct *next,
+                                            struct task_struct *tsk)
+{
+       /*
+        * Only need the full barrier when switching between processes.
+        * Barrier when switching from kernel to userspace is not
+        * required here, given that it is implied by mmdrop(). Barrier
+        * when switching from userspace to kernel is not needed after
+        * store to rq->curr.
+        */
+       if (likely(!(atomic_read(&next->membarrier_state) &
+                    MEMBARRIER_STATE_PRIVATE_EXPEDITED) || !prev))
+               return;
+
+       /*
+        * The membarrier system call requires a full memory barrier
+        * after storing to rq->curr, before going back to user-space.
+        */
+       smp_mb();
+}
+
+#endif /* _ASM_POWERPC_MEMBARRIER_H */
index d60a62bf4fc763bd52008fff96fcbe9aca70b68a..0ab297c4cfad1486a54195022929ee022d75b964 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/mm.h>
 #include <linux/cpu.h>
 
 #include <linux/mm.h>
 #include <linux/cpu.h>
+#include <linux/sched/mm.h>
 
 #include <asm/mmu_context.h>
 
 
 #include <asm/mmu_context.h>
 
@@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                 *
                 * On the read side the barrier is in pte_xchg(), which orders
                 * the store to the PTE vs the load of mm_cpumask.
                 *
                 * On the read side the barrier is in pte_xchg(), which orders
                 * the store to the PTE vs the load of mm_cpumask.
+                *
+                * This full barrier is needed by membarrier when switching
+                * between processes after store to rq->curr, before user-space
+                * memory accesses.
                 */
                smp_mb();
 
                 */
                smp_mb();
 
@@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
        if (new_on_cpu)
                radix_kvm_prefetch_workaround(next);
 
        if (new_on_cpu)
                radix_kvm_prefetch_workaround(next);
+       else
+               membarrier_arch_switch_mm(prev, next, tsk);
 
        /*
         * The actual HW switching method differs between the various
 
        /*
         * The actual HW switching method differs between the various
index 3d49b91b674d75640d9eebdb0bff240434e2a6dd..26307cdc3969974b574621191cb0d38a9d93d89d 100644 (file)
@@ -215,14 +215,25 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
 #ifdef CONFIG_MEMBARRIER
 enum {
        MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY        = (1U << 0),
 #ifdef CONFIG_MEMBARRIER
 enum {
        MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY        = (1U << 0),
-       MEMBARRIER_STATE_SWITCH_MM                      = (1U << 1),
+       MEMBARRIER_STATE_PRIVATE_EXPEDITED              = (1U << 1),
 };
 
 };
 
+#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
+#include <asm/membarrier.h>
+#endif
+
 static inline void membarrier_execve(struct task_struct *t)
 {
        atomic_set(&t->mm->membarrier_state, 0);
 }
 #else
 static inline void membarrier_execve(struct task_struct *t)
 {
        atomic_set(&t->mm->membarrier_state, 0);
 }
 #else
+#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+                                            struct mm_struct *next,
+                                            struct task_struct *tsk)
+{
+}
+#endif
 static inline void membarrier_execve(struct task_struct *t)
 {
 }
 static inline void membarrier_execve(struct task_struct *t)
 {
 }
index a9a2e2c86671a18c02c49643151204dd3358157b..837adcf075d9dd0f598c19492a4a47e38b121105 100644 (file)
@@ -1412,6 +1412,9 @@ config USERFAULTFD
          Enable the userfaultfd() system call that allows to intercept and
          handle page faults in userland.
 
          Enable the userfaultfd() system call that allows to intercept and
          handle page faults in userland.
 
+config ARCH_HAS_MEMBARRIER_CALLBACKS
+       bool
+
 config EMBEDDED
        bool "Embedded system"
        option allnoconfig_y
 config EMBEDDED
        bool "Embedded system"
        option allnoconfig_y
index 3da7a2444a911131589ce616147efd517c7bd354..ead0c2135d470d4a39d2f886672738fc619936bc 100644 (file)
@@ -2698,16 +2698,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
        prev_state = prev->state;
        vtime_task_switch(prev);
        perf_event_task_sched_in(prev, current);
        prev_state = prev->state;
        vtime_task_switch(prev);
        perf_event_task_sched_in(prev, current);
-       /*
-        * The membarrier system call requires a full memory barrier
-        * after storing to rq->curr, before going back to user-space.
-        *
-        * TODO: This smp_mb__after_unlock_lock can go away if PPC end
-        * up adding a full barrier to switch_mm(), or we should figure
-        * out if a smp_mb__after_unlock_lock is really the proper API
-        * to use.
-        */
-       smp_mb__after_unlock_lock();
        finish_task(prev);
        finish_lock_switch(rq);
        finish_arch_post_lock_switch();
        finish_task(prev);
        finish_lock_switch(rq);
        finish_arch_post_lock_switch();
index 9bcbacba82a8115ddceda7fb26097a23c50d44f5..678577267a9ac5228ed9327b6d9e283ac37a8530 100644 (file)
@@ -118,6 +118,14 @@ static void membarrier_register_private_expedited(void)
        if (atomic_read(&mm->membarrier_state)
                        & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
                return;
        if (atomic_read(&mm->membarrier_state)
                        & MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY)
                return;
+       atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
+       if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
+               /*
+                * Ensure all future scheduler executions will observe the
+                * new thread flag state for this process.
+                */
+               synchronize_sched();
+       }
        atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
                        &mm->membarrier_state);
 }
        atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
                        &mm->membarrier_state);
 }