sched/core: Fix DEBUG_SPINLOCK annotation for rq->lock

[sfrench/cifs-2.6.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 3da7a2444a911131589ce616147efd517c7bd354..e7c535eee0a6d493a2a43eba210c08c6858b63d1 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1630,16 +1630,16 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
  
  #ifdef CONFIG_SMP
         if (cpu == rq->cpu) {
-               schedstat_inc(rq->ttwu_local);
-               schedstat_inc(p->se.statistics.nr_wakeups_local);
+               __schedstat_inc(rq->ttwu_local);
+               __schedstat_inc(p->se.statistics.nr_wakeups_local);
         } else {
                 struct sched_domain *sd;
  
-               schedstat_inc(p->se.statistics.nr_wakeups_remote);
+               __schedstat_inc(p->se.statistics.nr_wakeups_remote);
                 rcu_read_lock();
                 for_each_domain(rq->cpu, sd) {
                         if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-                               schedstat_inc(sd->ttwu_wake_remote);
+                               __schedstat_inc(sd->ttwu_wake_remote);
                                 break;
                         }
                 }
@@ -1647,14 +1647,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
         }
  
         if (wake_flags & WF_MIGRATED)
-               schedstat_inc(p->se.statistics.nr_wakeups_migrate);
+               __schedstat_inc(p->se.statistics.nr_wakeups_migrate);
  #endif /* CONFIG_SMP */
  
-       schedstat_inc(rq->ttwu_count);
-       schedstat_inc(p->se.statistics.nr_wakeups);
+       __schedstat_inc(rq->ttwu_count);
+       __schedstat_inc(p->se.statistics.nr_wakeups);
  
         if (wake_flags & WF_SYNC)
-               schedstat_inc(p->se.statistics.nr_wakeups_sync);
+               __schedstat_inc(p->se.statistics.nr_wakeups_sync);
  }
  
  static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
@@ -2461,6 +2461,7 @@ void wake_up_new_task(struct task_struct *p)
          * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
          * as we're not fully set-up yet.
          */
+       p->recent_used_cpu = task_cpu(p);
         __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
  #endif
         rq = __task_rq_lock(p, &rf);
@@ -2600,19 +2601,31 @@ static inline void finish_task(struct task_struct *prev)
  #endif
  }
  
-static inline void finish_lock_switch(struct rq *rq)
+static inline void
+prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
  {
+       /*
+        * Since the runqueue lock will be released by the next
+        * task (which is an invalid locking op but in the case
+        * of the scheduler it's an obvious special-case), so we
+        * do an early lockdep release here:
+        */
+       rq_unpin_lock(rq, rf);
+       spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
  #ifdef CONFIG_DEBUG_SPINLOCK
         /* this is a valid case when another task releases the spinlock */
-       rq->lock.owner = current;
+       rq->lock.owner = next;
  #endif
+}
+
+static inline void finish_lock_switch(struct rq *rq)
+{
         /*
          * If we are tracking spinlock dependencies then we have to
          * fix up the runqueue lock - which gets 'carried over' from
          * prev into current:
          */
         spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-
         raw_spin_unlock_irq(&rq->lock);
  }
  
@@ -2698,23 +2711,27 @@ static struct rq *finish_task_switch(struct task_struct *prev)
         prev_state = prev->state;
         vtime_task_switch(prev);
         perf_event_task_sched_in(prev, current);
-       /*
-        * The membarrier system call requires a full memory barrier
-        * after storing to rq->curr, before going back to user-space.
-        *
-        * TODO: This smp_mb__after_unlock_lock can go away if PPC end
-        * up adding a full barrier to switch_mm(), or we should figure
-        * out if a smp_mb__after_unlock_lock is really the proper API
-        * to use.
-        */
-       smp_mb__after_unlock_lock();
         finish_task(prev);
         finish_lock_switch(rq);
         finish_arch_post_lock_switch();
  
         fire_sched_in_preempt_notifiers(current);
-       if (mm)
+       /*
+        * When switching through a kernel thread, the loop in
+        * membarrier_{private,global}_expedited() may have observed that
+        * kernel thread and not issued an IPI. It is therefore possible to
+        * schedule between user->kernel->user threads without passing though
+        * switch_mm(). Membarrier requires a barrier after storing to
+        * rq->curr, before returning to userspace, so provide them here:
+        *
+        * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
+        *   provided by mmdrop(),
+        * - a sync_core for SYNC_CORE.
+        */
+       if (mm) {
+               membarrier_mm_sync_core_before_usermode(mm);
                 mmdrop(mm);
+       }
         if (unlikely(prev_state == TASK_DEAD)) {
                 if (prev->sched_class->task_dead)
                         prev->sched_class->task_dead(prev);
@@ -2818,6 +2835,13 @@ context_switch(struct rq *rq, struct task_struct *prev,
          */
         arch_start_context_switch(prev);
  
+       /*
+        * If mm is non-NULL, we pass through switch_mm(). If mm is
+        * NULL, we will pass through mmdrop() in finish_task_switch().
+        * Both of these contain the full memory barrier required by
+        * membarrier after storing to rq->curr, before returning to
+        * user-space.
+        */
         if (!mm) {
                 next->active_mm = oldmm;
                 mmgrab(oldmm);
@@ -2832,14 +2856,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
  
         rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
  
-       /*
-        * Since the runqueue lock will be released by the next
-        * task (which is an invalid locking op but in the case
-        * of the scheduler it's an obvious special-case), so we
-        * do an early lockdep release here:
-        */
-       rq_unpin_lock(rq, rf);
-       spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+       prepare_lock_switch(rq, next, rf);
  
         /* Here we just switch the register state and the stack. */
         switch_to(prev, next, prev);
@@ -3354,6 +3371,9 @@ static void __sched notrace __schedule(bool preempt)
          * Make sure that signal_pending_state()->signal_pending() below
          * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
          * done by the caller to avoid the race with signal_wake_up().
+        *
+        * The membarrier system call requires a full memory barrier
+        * after coming from user-space, before storing to rq->curr.
          */
         rq_lock(rq, &rf);
         smp_mb__after_spinlock();
@@ -3401,17 +3421,16 @@ static void __sched notrace __schedule(bool preempt)
                 /*
                  * The membarrier system call requires each architecture
                  * to have a full memory barrier after updating
-                * rq->curr, before returning to user-space. For TSO
-                * (e.g. x86), the architecture must provide its own
-                * barrier in switch_mm(). For weakly ordered machines
-                * for which spin_unlock() acts as a full memory
-                * barrier, finish_lock_switch() in common code takes
-                * care of this barrier. For weakly ordered machines for
-                * which spin_unlock() acts as a RELEASE barrier (only
-                * arm64 and PowerPC), arm64 has a full barrier in
-                * switch_to(), and PowerPC has
-                * smp_mb__after_unlock_lock() before
-                * finish_lock_switch().
+                * rq->curr, before returning to user-space.
+                *
+                * Here are the schemes providing that barrier on the
+                * various architectures:
+                * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
+                *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
+                * - finish_lock_switch() for weakly-ordered
+                *   architectures where spin_unlock is a full barrier,
+                * - switch_to() for arm64 (weakly-ordered, spin_unlock
+                *   is a RELEASE barrier),
                  */
                 ++*switch_count;
  
@@ -4853,7 +4872,7 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
  
         ret = sched_getaffinity(pid, mask);
         if (ret == 0) {
-               size_t retlen = min_t(size_t, len, cpumask_size());
+               unsigned int retlen = min(len, cpumask_size());
  
                 if (copy_to_user(user_mask_ptr, mask, retlen))
                         ret = -EFAULT;