Merge branch 'sched/urgent' into sched/core

author Ingo Molnar <mingo@elte.hu>

Sun, 2 Aug 2009 12:11:26 +0000 (14:11 +0200)

committer Ingo Molnar <mingo@elte.hu>

Sun, 2 Aug 2009 12:23:57 +0000 (14:23 +0200)
author Ingo Molnar <mingo@elte.hu>
Sun, 2 Aug 2009 12:11:26 +0000 (14:11 +0200)
committer Ingo Molnar <mingo@elte.hu>
Sun, 2 Aug 2009 12:23:57 +0000 (14:23 +0200)
diff --combined include/linux/sched.h

index c472414953bfc9d4133cadd48e19cca234e8be6e,3ab08e4bb6b87c608d8e58b3f37e4260c18c1f4e..2c35bc29d2a91a863daec532c34ea85f79ef24c8
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -38,8 -38,6 +38,8 @@@
   #define SCHED_BATCH           3
   /* SCHED_ISO: reserved but not implemented yet */
   #define SCHED_IDLE            5
+ +/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
+ +#define SCHED_RESET_ON_FORK     0x40000000
   
   #ifdef __KERNEL__
   
@@@ -211,7 -209,7 +211,7 @@@ extern unsigned long long time_sync_thr
                         ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
   #define task_contributes_to_load(task)        \
                                 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-                                (task->flags & PF_FROZEN) == 0)
+                                (task->flags & PF_FREEZING) == 0)
   
   #define __set_task_state(tsk, state_value)            \
         do { (tsk)->state = (state_value); } while (0)
@@@ -1231,10 -1229,6 +1231,10 @@@ struct task_struct 
         unsigned did_exec:1;
         unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                                  * execve */
+ +
+ +      /* Revert to default priority/policy when forking */
+ +      unsigned sched_reset_on_fork:1;
+ +
         pid_t pid;
         pid_t tgid;
   
@@@ -1686,6 -1680,7 +1686,7 @@@ extern cputime_t task_gtime(struct task
   #define PF_MEMALLOC   0x00000800      /* Allocating memory */
   #define PF_FLUSHER    0x00001000      /* responsible for disk writeback */
   #define PF_USED_MATH  0x00002000      /* if unset the fpu must be initialized before use */
+ #define PF_FREEZING   0x00004000      /* freeze in progress. do not account to load */
   #define PF_NOFREEZE   0x00008000      /* this thread should not be frozen */
   #define PF_FROZEN     0x00010000      /* frozen for system suspend */
   #define PF_FSTRANS    0x00020000      /* inside a filesystem transaction */
@@@ -2285,31 -2280,23 +2286,31 @@@ static inline int need_resched(void
    * cond_resched_softirq() will enable bhs before scheduling.
    */
   extern int _cond_resched(void);
- -#ifdef CONFIG_PREEMPT_BKL
- -static inline int cond_resched(void)
- -{
- -      return 0;
- -}
+ +
+ +#define cond_resched() ({                     \
+ +      __might_sleep(__FILE__, __LINE__, 0);   \
+ +      _cond_resched();                        \
+ +})
+ +
+ +extern int __cond_resched_lock(spinlock_t *lock);
+ +
+ +#ifdef CONFIG_PREEMPT
+ +#define PREEMPT_LOCK_OFFSET   PREEMPT_OFFSET
   #else
- -static inline int cond_resched(void)
- -{
- -      return _cond_resched();
- -}
+ +#define PREEMPT_LOCK_OFFSET   0
   #endif
- -extern int cond_resched_lock(spinlock_t * lock);
- -extern int cond_resched_softirq(void);
- -static inline int cond_resched_bkl(void)
- -{
- -      return _cond_resched();
- -}
+ +
+ +#define cond_resched_lock(lock) ({                            \
+ +      __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
+ +      __cond_resched_lock(lock);                              \
+ +})
+ +
+ +extern int __cond_resched_softirq(void);
+ +
+ +#define cond_resched_softirq() ({                             \
+ +      __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET);      \
+ +      __cond_resched_softirq();                               \
+ +})
   
   /*
    * Does a critical section need to be broken due to another
diff --combined kernel/sched.c

index 953f037dc053c3164231acec03bf2cd3b77b457f,1b59e265273b032d6aac2baec9b059646ca563bf..ce1056e9b02ac5d3cc091a517cc2abfc7a75eb5a
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -693,7 -693,6 +693,7 @@@ static inline int cpu_of(struct rq *rq
   #define this_rq()             (&__get_cpu_var(runqueues))
   #define task_rq(p)            cpu_rq(task_cpu(p))
   #define cpu_curr(cpu)         (cpu_rq(cpu)->curr)
+ +#define raw_rq()              (&__raw_get_cpu_var(runqueues))
   
   inline void update_rq_clock(struct rq *rq)
   {
@@@ -2638,32 -2637,9 +2638,32 @@@ void sched_fork(struct task_struct *p, 
         set_task_cpu(p, cpu);
   
         /*
- -       * Make sure we do not leak PI boosting priority to the child:
+ +       * Make sure we do not leak PI boosting priority to the child.
          */
         p->prio = current->normal_prio;
+ +
+ +      /*
+ +       * Revert to default priority/policy on fork if requested.
+ +       */
+ +      if (unlikely(p->sched_reset_on_fork)) {
+ +              if (p->policy == SCHED_FIFO || p->policy == SCHED_RR)
+ +                      p->policy = SCHED_NORMAL;
+ +
+ +              if (p->normal_prio < DEFAULT_PRIO)
+ +                      p->prio = DEFAULT_PRIO;
+ +
+ +              if (PRIO_TO_NICE(p->static_prio) < 0) {
+ +                      p->static_prio = NICE_TO_PRIO(0);
+ +                      set_load_weight(p);
+ +              }
+ +
+ +              /*
+ +               * We don't need the reset flag anymore after the fork. It has
+ +               * fulfilled its duty:
+ +               */
+ +              p->sched_reset_on_fork = 0;
+ +      }
+ +
         if (!rt_prio(p->prio))
                 p->sched_class = &fair_sched_class;
   
@@@ -6147,25 -6123,17 +6147,25 @@@ static int __sched_setscheduler(struct 
         unsigned long flags;
         const struct sched_class *prev_class = p->sched_class;
         struct rq *rq;
+ +      int reset_on_fork;
   
         /* may grab non-irq protected spin_locks */
         BUG_ON(in_interrupt());
   recheck:
         /* double check policy once rq lock held */
- -      if (policy < 0)
+ +      if (policy < 0) {
+ +              reset_on_fork = p->sched_reset_on_fork;
                 policy = oldpolicy = p->policy;
- -      else if (policy != SCHED_FIFO && policy != SCHED_RR &&
- -                      policy != SCHED_NORMAL && policy != SCHED_BATCH &&
- -                      policy != SCHED_IDLE)
- -              return -EINVAL;
+ +      } else {
+ +              reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
+ +              policy &= ~SCHED_RESET_ON_FORK;
+ +
+ +              if (policy != SCHED_FIFO && policy != SCHED_RR &&
+ +                              policy != SCHED_NORMAL && policy != SCHED_BATCH &&
+ +                              policy != SCHED_IDLE)
+ +                      return -EINVAL;
+ +      }
+ +
         /*
          * Valid priorities for SCHED_FIFO and SCHED_RR are
          * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
@@@ -6209,10 -6177,6 +6209,10 @@@
                 /* can't change other user's priorities */
                 if (!check_same_owner(p))
                         return -EPERM;
+ +
+ +              /* Normal users shall not reset the sched_reset_on_fork flag */
+ +              if (p->sched_reset_on_fork && !reset_on_fork)
+ +                      return -EPERM;
         }
   
         if (user) {
@@@ -6256,8 -6220,6 +6256,8 @@@
         if (running)
                 p->sched_class->put_prev_task(rq, p);
   
+ +      p->sched_reset_on_fork = reset_on_fork;
+ +
         oldprio = p->prio;
         __setscheduler(rq, p, policy, param->sched_priority);
   
@@@ -6374,15 -6336,14 +6374,15 @@@ SYSCALL_DEFINE1(sched_getscheduler, pid
         if (p) {
                 retval = security_task_getscheduler(p);
                 if (!retval)
- -                      retval = p->policy;
+ +                      retval = p->policy
+ +                              | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
         }
         read_unlock(&tasklist_lock);
         return retval;
   }
   
   /**
- - * sys_sched_getscheduler - get the RT priority of a thread
+ + * sys_sched_getparam - get the RT priority of a thread
    * @pid: the pid in question.
    * @param: structure containing the RT priority.
    */
@@@ -6610,9 -6571,19 +6610,9 @@@ static inline int should_resched(void
   
   static void __cond_resched(void)
   {
- -#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
- -      __might_sleep(__FILE__, __LINE__);
- -#endif
- -      /*
- -       * The BKS might be reacquired before we have dropped
- -       * PREEMPT_ACTIVE, which could trigger a second
- -       * cond_resched() call.
- -       */
- -      do {
- -              add_preempt_count(PREEMPT_ACTIVE);
- -              schedule();
- -              sub_preempt_count(PREEMPT_ACTIVE);
- -      } while (need_resched());
+ +      add_preempt_count(PREEMPT_ACTIVE);
+ +      schedule();
+ +      sub_preempt_count(PREEMPT_ACTIVE);
   }
   
   int __sched _cond_resched(void)
@@@ -6626,14 -6597,14 +6626,14 @@@
   EXPORT_SYMBOL(_cond_resched);
   
   /*
- - * cond_resched_lock() - if a reschedule is pending, drop the given lock,
+ + * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
    * call schedule, and on return reacquire the lock.
    *
    * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
    * operations here to prevent schedule() from being called twice (once via
    * spin_unlock(), once by hand).
    */
- -int cond_resched_lock(spinlock_t *lock)
+ +int __cond_resched_lock(spinlock_t *lock)
   {
         int resched = should_resched();
         int ret = 0;
@@@ -6649,9 -6620,9 +6649,9 @@@
         }
         return ret;
   }
- -EXPORT_SYMBOL(cond_resched_lock);
+ +EXPORT_SYMBOL(__cond_resched_lock);
   
- -int __sched cond_resched_softirq(void)
+ +int __sched __cond_resched_softirq(void)
   {
         BUG_ON(!in_softirq());
   
@@@ -6663,7 -6634,7 +6663,7 @@@
         }
         return 0;
   }
- -EXPORT_SYMBOL(cond_resched_softirq);
+ +EXPORT_SYMBOL(__cond_resched_softirq);
   
   /**
    * yield - yield the current processor to other threads.
@@@ -6687,7 -6658,7 +6687,7 @@@ EXPORT_SYMBOL(yield)
    */
   void __sched io_schedule(void)
   {
- -      struct rq *rq = &__raw_get_cpu_var(runqueues);
+ +      struct rq *rq = raw_rq();
   
         delayacct_blkio_start();
         atomic_inc(&rq->nr_iowait);
@@@ -6699,7 -6670,7 +6699,7 @@@ EXPORT_SYMBOL(io_schedule)
   
   long __sched io_schedule_timeout(long timeout)
   {
- -      struct rq *rq = &__raw_get_cpu_var(runqueues);
+ +      struct rq *rq = raw_rq();
         long ret;
   
         delayacct_blkio_start();
@@@ -7318,6 -7289,7 +7318,7 @@@ static void migrate_dead_tasks(unsigne
   static void calc_global_load_remove(struct rq *rq)
   {
         atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+       rq->calc_load_active = 0;
   }
   #endif /* CONFIG_HOTPLUG_CPU */
   
@@@ -7544,6 -7516,7 +7545,7 @@@ migration_call(struct notifier_block *n
                 task_rq_unlock(rq, &flags);
                 get_task_struct(p);
                 cpu_rq(cpu)->migration_thread = p;
+               rq->calc_load_update = calc_load_update;
                 break;
   
         case CPU_ONLINE:
@@@ -7554,8 -7527,6 +7556,6 @@@
                 /* Update our root-domain */
                 rq = cpu_rq(cpu);
                 spin_lock_irqsave(&rq->lock, flags);
-               rq->calc_load_update = calc_load_update;
-               rq->calc_load_active = 0;
                 if (rq->rd) {
                         BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
   
@@@ -7654,7 -7625,7 +7654,7 @@@ static int __init migration_init(void
         migration_call(&migration_notifier, CPU_ONLINE, cpu);
         register_cpu_notifier(&migration_notifier);
   
- -      return err;
+ +      return 0;
   }
   early_initcall(migration_init);
   #endif
@@@ -9427,20 -9398,13 +9427,20 @@@ void __init sched_init(void
   }
   
   #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
- -void __might_sleep(char *file, int line)
+ +static inline int preempt_count_equals(int preempt_offset)
+ +{
+ +      int nested = preempt_count() & ~PREEMPT_ACTIVE;
+ +
+ +      return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
+ +}
+ +
+ +void __might_sleep(char *file, int line, int preempt_offset)
   {
   #ifdef in_atomic
         static unsigned long prev_jiffy;        /* ratelimiting */
   
- -      if ((!in_atomic() && !irqs_disabled()) ||
- -                  system_state != SYSTEM_RUNNING || oops_in_progress)
+ +      if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
+ +          system_state != SYSTEM_RUNNING || oops_in_progress)
                 return;
         if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                 return;
author	Ingo Molnar <mingo@elte.hu>
	Sun, 2 Aug 2009 12:11:26 +0000 (14:11 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Sun, 2 Aug 2009 12:23:57 +0000 (14:23 +0200)
		1	2
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history