Merge branch 'sched/urgent' into sched/core
authorIngo Molnar <mingo@elte.hu>
Sun, 2 Aug 2009 12:11:26 +0000 (14:11 +0200)
committerIngo Molnar <mingo@elte.hu>
Sun, 2 Aug 2009 12:23:57 +0000 (14:23 +0200)
Merge reason: avoid upcoming patch conflict.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
1  2 
include/linux/sched.h
kernel/sched.c

diff --combined include/linux/sched.h
index c472414953bfc9d4133cadd48e19cca234e8be6e,3ab08e4bb6b87c608d8e58b3f37e4260c18c1f4e..2c35bc29d2a91a863daec532c34ea85f79ef24c8
@@@ -38,8 -38,6 +38,8 @@@
  #define SCHED_BATCH           3
  /* SCHED_ISO: reserved but not implemented yet */
  #define SCHED_IDLE            5
 +/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
 +#define SCHED_RESET_ON_FORK     0x40000000
  
  #ifdef __KERNEL__
  
@@@ -211,7 -209,7 +211,7 @@@ extern unsigned long long time_sync_thr
                        ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  #define task_contributes_to_load(task)        \
                                ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-                                (task->flags & PF_FROZEN) == 0)
+                                (task->flags & PF_FREEZING) == 0)
  
  #define __set_task_state(tsk, state_value)            \
        do { (tsk)->state = (state_value); } while (0)
@@@ -1231,10 -1229,6 +1231,10 @@@ struct task_struct 
        unsigned did_exec:1;
        unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                                 * execve */
 +
 +      /* Revert to default priority/policy when forking */
 +      unsigned sched_reset_on_fork:1;
 +
        pid_t pid;
        pid_t tgid;
  
@@@ -1686,6 -1680,7 +1686,7 @@@ extern cputime_t task_gtime(struct task
  #define PF_MEMALLOC   0x00000800      /* Allocating memory */
  #define PF_FLUSHER    0x00001000      /* responsible for disk writeback */
  #define PF_USED_MATH  0x00002000      /* if unset the fpu must be initialized before use */
+ #define PF_FREEZING   0x00004000      /* freeze in progress. do not account to load */
  #define PF_NOFREEZE   0x00008000      /* this thread should not be frozen */
  #define PF_FROZEN     0x00010000      /* frozen for system suspend */
  #define PF_FSTRANS    0x00020000      /* inside a filesystem transaction */
@@@ -2285,31 -2280,23 +2286,31 @@@ static inline int need_resched(void
   * cond_resched_softirq() will enable bhs before scheduling.
   */
  extern int _cond_resched(void);
 -#ifdef CONFIG_PREEMPT_BKL
 -static inline int cond_resched(void)
 -{
 -      return 0;
 -}
 +
 +#define cond_resched() ({                     \
 +      __might_sleep(__FILE__, __LINE__, 0);   \
 +      _cond_resched();                        \
 +})
 +
 +extern int __cond_resched_lock(spinlock_t *lock);
 +
 +#ifdef CONFIG_PREEMPT
 +#define PREEMPT_LOCK_OFFSET   PREEMPT_OFFSET
  #else
 -static inline int cond_resched(void)
 -{
 -      return _cond_resched();
 -}
 +#define PREEMPT_LOCK_OFFSET   0
  #endif
 -extern int cond_resched_lock(spinlock_t * lock);
 -extern int cond_resched_softirq(void);
 -static inline int cond_resched_bkl(void)
 -{
 -      return _cond_resched();
 -}
 +
 +#define cond_resched_lock(lock) ({                            \
 +      __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
 +      __cond_resched_lock(lock);                              \
 +})
 +
 +extern int __cond_resched_softirq(void);
 +
 +#define cond_resched_softirq() ({                             \
 +      __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET);      \
 +      __cond_resched_softirq();                               \
 +})
  
  /*
   * Does a critical section need to be broken due to another
diff --combined kernel/sched.c
index 953f037dc053c3164231acec03bf2cd3b77b457f,1b59e265273b032d6aac2baec9b059646ca563bf..ce1056e9b02ac5d3cc091a517cc2abfc7a75eb5a
@@@ -693,7 -693,6 +693,7 @@@ static inline int cpu_of(struct rq *rq
  #define this_rq()             (&__get_cpu_var(runqueues))
  #define task_rq(p)            cpu_rq(task_cpu(p))
  #define cpu_curr(cpu)         (cpu_rq(cpu)->curr)
 +#define raw_rq()              (&__raw_get_cpu_var(runqueues))
  
  inline void update_rq_clock(struct rq *rq)
  {
@@@ -2638,32 -2637,9 +2638,32 @@@ void sched_fork(struct task_struct *p, 
        set_task_cpu(p, cpu);
  
        /*
 -       * Make sure we do not leak PI boosting priority to the child:
 +       * Make sure we do not leak PI boosting priority to the child.
         */
        p->prio = current->normal_prio;
 +
 +      /*
 +       * Revert to default priority/policy on fork if requested.
 +       */
 +      if (unlikely(p->sched_reset_on_fork)) {
 +              if (p->policy == SCHED_FIFO || p->policy == SCHED_RR)
 +                      p->policy = SCHED_NORMAL;
 +
 +              if (p->normal_prio < DEFAULT_PRIO)
 +                      p->prio = DEFAULT_PRIO;
 +
 +              if (PRIO_TO_NICE(p->static_prio) < 0) {
 +                      p->static_prio = NICE_TO_PRIO(0);
 +                      set_load_weight(p);
 +              }
 +
 +              /*
 +               * We don't need the reset flag anymore after the fork. It has
 +               * fulfilled its duty:
 +               */
 +              p->sched_reset_on_fork = 0;
 +      }
 +
        if (!rt_prio(p->prio))
                p->sched_class = &fair_sched_class;
  
@@@ -6147,25 -6123,17 +6147,25 @@@ static int __sched_setscheduler(struct 
        unsigned long flags;
        const struct sched_class *prev_class = p->sched_class;
        struct rq *rq;
 +      int reset_on_fork;
  
        /* may grab non-irq protected spin_locks */
        BUG_ON(in_interrupt());
  recheck:
        /* double check policy once rq lock held */
 -      if (policy < 0)
 +      if (policy < 0) {
 +              reset_on_fork = p->sched_reset_on_fork;
                policy = oldpolicy = p->policy;
 -      else if (policy != SCHED_FIFO && policy != SCHED_RR &&
 -                      policy != SCHED_NORMAL && policy != SCHED_BATCH &&
 -                      policy != SCHED_IDLE)
 -              return -EINVAL;
 +      } else {
 +              reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
 +              policy &= ~SCHED_RESET_ON_FORK;
 +
 +              if (policy != SCHED_FIFO && policy != SCHED_RR &&
 +                              policy != SCHED_NORMAL && policy != SCHED_BATCH &&
 +                              policy != SCHED_IDLE)
 +                      return -EINVAL;
 +      }
 +
        /*
         * Valid priorities for SCHED_FIFO and SCHED_RR are
         * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
                /* can't change other user's priorities */
                if (!check_same_owner(p))
                        return -EPERM;
 +
 +              /* Normal users shall not reset the sched_reset_on_fork flag */
 +              if (p->sched_reset_on_fork && !reset_on_fork)
 +                      return -EPERM;
        }
  
        if (user) {
        if (running)
                p->sched_class->put_prev_task(rq, p);
  
 +      p->sched_reset_on_fork = reset_on_fork;
 +
        oldprio = p->prio;
        __setscheduler(rq, p, policy, param->sched_priority);
  
@@@ -6374,15 -6336,14 +6374,15 @@@ SYSCALL_DEFINE1(sched_getscheduler, pid
        if (p) {
                retval = security_task_getscheduler(p);
                if (!retval)
 -                      retval = p->policy;
 +                      retval = p->policy
 +                              | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
        }
        read_unlock(&tasklist_lock);
        return retval;
  }
  
  /**
 - * sys_sched_getscheduler - get the RT priority of a thread
 + * sys_sched_getparam - get the RT priority of a thread
   * @pid: the pid in question.
   * @param: structure containing the RT priority.
   */
@@@ -6610,9 -6571,19 +6610,9 @@@ static inline int should_resched(void
  
  static void __cond_resched(void)
  {
 -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 -      __might_sleep(__FILE__, __LINE__);
 -#endif
 -      /*
 -       * The BKS might be reacquired before we have dropped
 -       * PREEMPT_ACTIVE, which could trigger a second
 -       * cond_resched() call.
 -       */
 -      do {
 -              add_preempt_count(PREEMPT_ACTIVE);
 -              schedule();
 -              sub_preempt_count(PREEMPT_ACTIVE);
 -      } while (need_resched());
 +      add_preempt_count(PREEMPT_ACTIVE);
 +      schedule();
 +      sub_preempt_count(PREEMPT_ACTIVE);
  }
  
  int __sched _cond_resched(void)
  EXPORT_SYMBOL(_cond_resched);
  
  /*
 - * cond_resched_lock() - if a reschedule is pending, drop the given lock,
 + * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
   * call schedule, and on return reacquire the lock.
   *
   * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
   * operations here to prevent schedule() from being called twice (once via
   * spin_unlock(), once by hand).
   */
 -int cond_resched_lock(spinlock_t *lock)
 +int __cond_resched_lock(spinlock_t *lock)
  {
        int resched = should_resched();
        int ret = 0;
        }
        return ret;
  }
 -EXPORT_SYMBOL(cond_resched_lock);
 +EXPORT_SYMBOL(__cond_resched_lock);
  
 -int __sched cond_resched_softirq(void)
 +int __sched __cond_resched_softirq(void)
  {
        BUG_ON(!in_softirq());
  
        }
        return 0;
  }
 -EXPORT_SYMBOL(cond_resched_softirq);
 +EXPORT_SYMBOL(__cond_resched_softirq);
  
  /**
   * yield - yield the current processor to other threads.
@@@ -6687,7 -6658,7 +6687,7 @@@ EXPORT_SYMBOL(yield)
   */
  void __sched io_schedule(void)
  {
 -      struct rq *rq = &__raw_get_cpu_var(runqueues);
 +      struct rq *rq = raw_rq();
  
        delayacct_blkio_start();
        atomic_inc(&rq->nr_iowait);
@@@ -6699,7 -6670,7 +6699,7 @@@ EXPORT_SYMBOL(io_schedule)
  
  long __sched io_schedule_timeout(long timeout)
  {
 -      struct rq *rq = &__raw_get_cpu_var(runqueues);
 +      struct rq *rq = raw_rq();
        long ret;
  
        delayacct_blkio_start();
@@@ -7318,6 -7289,7 +7318,7 @@@ static void migrate_dead_tasks(unsigne
  static void calc_global_load_remove(struct rq *rq)
  {
        atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+       rq->calc_load_active = 0;
  }
  #endif /* CONFIG_HOTPLUG_CPU */
  
@@@ -7544,6 -7516,7 +7545,7 @@@ migration_call(struct notifier_block *n
                task_rq_unlock(rq, &flags);
                get_task_struct(p);
                cpu_rq(cpu)->migration_thread = p;
+               rq->calc_load_update = calc_load_update;
                break;
  
        case CPU_ONLINE:
                /* Update our root-domain */
                rq = cpu_rq(cpu);
                spin_lock_irqsave(&rq->lock, flags);
-               rq->calc_load_update = calc_load_update;
-               rq->calc_load_active = 0;
                if (rq->rd) {
                        BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
  
@@@ -7654,7 -7625,7 +7654,7 @@@ static int __init migration_init(void
        migration_call(&migration_notifier, CPU_ONLINE, cpu);
        register_cpu_notifier(&migration_notifier);
  
 -      return err;
 +      return 0;
  }
  early_initcall(migration_init);
  #endif
@@@ -9427,20 -9398,13 +9427,20 @@@ void __init sched_init(void
  }
  
  #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 -void __might_sleep(char *file, int line)
 +static inline int preempt_count_equals(int preempt_offset)
 +{
 +      int nested = preempt_count() & ~PREEMPT_ACTIVE;
 +
 +      return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
 +}
 +
 +void __might_sleep(char *file, int line, int preempt_offset)
  {
  #ifdef in_atomic
        static unsigned long prev_jiffy;        /* ratelimiting */
  
 -      if ((!in_atomic() && !irqs_disabled()) ||
 -                  system_state != SYSTEM_RUNNING || oops_in_progress)
 +      if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
 +          system_state != SYSTEM_RUNNING || oops_in_progress)
                return;
        if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                return;