Merge branch 'sched/urgent' into sched/core

author Ingo Molnar <mingo@elte.hu>

Sun, 2 Aug 2009 12:11:26 +0000 (14:11 +0200)

committer Ingo Molnar <mingo@elte.hu>

Sun, 2 Aug 2009 12:23:57 +0000 (14:23 +0200)
author Ingo Molnar <mingo@elte.hu>
Sun, 2 Aug 2009 12:11:26 +0000 (14:11 +0200)
committer Ingo Molnar <mingo@elte.hu>
Sun, 2 Aug 2009 12:23:57 +0000 (14:23 +0200)
diff --git a/fs/dcache.c b/fs/dcache.c

index 9e5cd3c3a6ba76e8bc95aec9f4dc6c250fb7b2eb..a100fa35a48f3702fbe5522e2636501dc8eb93f3 100644 (file)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
  #include <linux/swap.h>
  #include <linux/bootmem.h>
  #include <linux/fs_struct.h>
+#include <linux/hardirq.h>
  #include "internal.h"
  
  int sysctl_vfs_cache_pressure __read_mostly = 100;
diff --git a/fs/locks.c b/fs/locks.c

index b6440f52178fad125f3d0101d7a97f82ad2cf3ee..2eb81975c99c7ee2c247cf6c7aa11e348bb1e3fc 100644 (file)
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -768,7 +768,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
          * give it the opportunity to lock the file.
          */
         if (found)
-               cond_resched_bkl();
+               cond_resched();
  
  find_conflict:
         for_each_lock(inode, before) {
diff --git a/include/linux/kernel.h b/include/linux/kernel.h

index d6320a3e8def0fc22a9595d8ff03f52c08983404..2b5b1e0899a81e93a4970e8f01924958423c968c 100644 (file)
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -125,7 +125,7 @@ extern int _cond_resched(void);
  #endif
  
  #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-  void __might_sleep(char *file, int line);
+  void __might_sleep(char *file, int line, int preempt_offset);
  /**
   * might_sleep - annotation for functions that can sleep
   *
@@ -137,8 +137,9 @@ extern int _cond_resched(void);
   * supposed to.
   */
  # define might_sleep() \
-       do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
+       do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
  #else
+  static inline void __might_sleep(char *file, int line, int preempt_offset) { }
  # define might_sleep() do { might_resched(); } while (0)
  #endif
  
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 3ab08e4bb6b87c608d8e58b3f37e4260c18c1f4e..2c35bc29d2a91a863daec532c34ea85f79ef24c8 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -38,6 +38,8 @@
  #define SCHED_BATCH            3
  /* SCHED_ISO: reserved but not implemented yet */
  #define SCHED_IDLE             5
+/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
+#define SCHED_RESET_ON_FORK     0x40000000
  
  #ifdef __KERNEL__
  
@@ -1229,6 +1231,10 @@ struct task_struct {
         unsigned did_exec:1;
         unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                                  * execve */
+
+       /* Revert to default priority/policy when forking */
+       unsigned sched_reset_on_fork:1;
+
         pid_t pid;
         pid_t tgid;
  
@@ -2280,23 +2286,31 @@ static inline int need_resched(void)
   * cond_resched_softirq() will enable bhs before scheduling.
   */
  extern int _cond_resched(void);
-#ifdef CONFIG_PREEMPT_BKL
-static inline int cond_resched(void)
-{
-       return 0;
-}
+
+#define cond_resched() ({                      \
+       __might_sleep(__FILE__, __LINE__, 0);   \
+       _cond_resched();                        \
+})
+
+extern int __cond_resched_lock(spinlock_t *lock);
+
+#ifdef CONFIG_PREEMPT
+#define PREEMPT_LOCK_OFFSET    PREEMPT_OFFSET
  #else
-static inline int cond_resched(void)
-{
-       return _cond_resched();
-}
+#define PREEMPT_LOCK_OFFSET    0
  #endif
-extern int cond_resched_lock(spinlock_t * lock);
-extern int cond_resched_softirq(void);
-static inline int cond_resched_bkl(void)
-{
-       return _cond_resched();
-}
+
+#define cond_resched_lock(lock) ({                             \
+       __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \
+       __cond_resched_lock(lock);                              \
+})
+
+extern int __cond_resched_softirq(void);
+
+#define cond_resched_softirq() ({                              \
+       __might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET);      \
+       __cond_resched_softirq();                               \
+})
  
  /*
   * Does a critical section need to be broken due to another
diff --git a/kernel/sched.c b/kernel/sched.c

index 1b59e265273b032d6aac2baec9b059646ca563bf..ce1056e9b02ac5d3cc091a517cc2abfc7a75eb5a 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -693,6 +693,7 @@ static inline int cpu_of(struct rq *rq)
  #define this_rq()              (&__get_cpu_var(runqueues))
  #define task_rq(p)             cpu_rq(task_cpu(p))
  #define cpu_curr(cpu)          (cpu_rq(cpu)->curr)
+#define raw_rq()               (&__raw_get_cpu_var(runqueues))
  
  inline void update_rq_clock(struct rq *rq)
  {
@@ -2637,9 +2638,32 @@ void sched_fork(struct task_struct *p, int clone_flags)
         set_task_cpu(p, cpu);
  
         /*
-        * Make sure we do not leak PI boosting priority to the child:
+        * Make sure we do not leak PI boosting priority to the child.
          */
         p->prio = current->normal_prio;
+
+       /*
+        * Revert to default priority/policy on fork if requested.
+        */
+       if (unlikely(p->sched_reset_on_fork)) {
+               if (p->policy == SCHED_FIFO || p->policy == SCHED_RR)
+                       p->policy = SCHED_NORMAL;
+
+               if (p->normal_prio < DEFAULT_PRIO)
+                       p->prio = DEFAULT_PRIO;
+
+               if (PRIO_TO_NICE(p->static_prio) < 0) {
+                       p->static_prio = NICE_TO_PRIO(0);
+                       set_load_weight(p);
+               }
+
+               /*
+                * We don't need the reset flag anymore after the fork. It has
+                * fulfilled its duty:
+                */
+               p->sched_reset_on_fork = 0;
+       }
+
         if (!rt_prio(p->prio))
                 p->sched_class = &fair_sched_class;
  
@@ -6123,17 +6147,25 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
         unsigned long flags;
         const struct sched_class *prev_class = p->sched_class;
         struct rq *rq;
+       int reset_on_fork;
  
         /* may grab non-irq protected spin_locks */
         BUG_ON(in_interrupt());
  recheck:
         /* double check policy once rq lock held */
-       if (policy < 0)
+       if (policy < 0) {
+               reset_on_fork = p->sched_reset_on_fork;
                 policy = oldpolicy = p->policy;
-       else if (policy != SCHED_FIFO && policy != SCHED_RR &&
-                       policy != SCHED_NORMAL && policy != SCHED_BATCH &&
-                       policy != SCHED_IDLE)
-               return -EINVAL;
+       } else {
+               reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
+               policy &= ~SCHED_RESET_ON_FORK;
+
+               if (policy != SCHED_FIFO && policy != SCHED_RR &&
+                               policy != SCHED_NORMAL && policy != SCHED_BATCH &&
+                               policy != SCHED_IDLE)
+                       return -EINVAL;
+       }
+
         /*
          * Valid priorities for SCHED_FIFO and SCHED_RR are
          * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
@@ -6177,6 +6209,10 @@ recheck:
                 /* can't change other user's priorities */
                 if (!check_same_owner(p))
                         return -EPERM;
+
+               /* Normal users shall not reset the sched_reset_on_fork flag */
+               if (p->sched_reset_on_fork && !reset_on_fork)
+                       return -EPERM;
         }
  
         if (user) {
@@ -6220,6 +6256,8 @@ recheck:
         if (running)
                 p->sched_class->put_prev_task(rq, p);
  
+       p->sched_reset_on_fork = reset_on_fork;
+
         oldprio = p->prio;
         __setscheduler(rq, p, policy, param->sched_priority);
  
@@ -6336,14 +6374,15 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
         if (p) {
                 retval = security_task_getscheduler(p);
                 if (!retval)
-                       retval = p->policy;
+                       retval = p->policy
+                               | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
         }
         read_unlock(&tasklist_lock);
         return retval;
  }
  
  /**
- * sys_sched_getscheduler - get the RT priority of a thread
+ * sys_sched_getparam - get the RT priority of a thread
   * @pid: the pid in question.
   * @param: structure containing the RT priority.
   */
@@ -6571,19 +6610,9 @@ static inline int should_resched(void)
  
  static void __cond_resched(void)
  {
-#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-       __might_sleep(__FILE__, __LINE__);
-#endif
-       /*
-        * The BKS might be reacquired before we have dropped
-        * PREEMPT_ACTIVE, which could trigger a second
-        * cond_resched() call.
-        */
-       do {
-               add_preempt_count(PREEMPT_ACTIVE);
-               schedule();
-               sub_preempt_count(PREEMPT_ACTIVE);
-       } while (need_resched());
+       add_preempt_count(PREEMPT_ACTIVE);
+       schedule();
+       sub_preempt_count(PREEMPT_ACTIVE);
  }
  
  int __sched _cond_resched(void)
@@ -6597,14 +6626,14 @@ int __sched _cond_resched(void)
  EXPORT_SYMBOL(_cond_resched);
  
  /*
- * cond_resched_lock() - if a reschedule is pending, drop the given lock,
+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
   * call schedule, and on return reacquire the lock.
   *
   * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
   * operations here to prevent schedule() from being called twice (once via
   * spin_unlock(), once by hand).
   */
-int cond_resched_lock(spinlock_t *lock)
+int __cond_resched_lock(spinlock_t *lock)
  {
         int resched = should_resched();
         int ret = 0;
@@ -6620,9 +6649,9 @@ int cond_resched_lock(spinlock_t *lock)
         }
         return ret;
  }
-EXPORT_SYMBOL(cond_resched_lock);
+EXPORT_SYMBOL(__cond_resched_lock);
  
-int __sched cond_resched_softirq(void)
+int __sched __cond_resched_softirq(void)
  {
         BUG_ON(!in_softirq());
  
@@ -6634,7 +6663,7 @@ int __sched cond_resched_softirq(void)
         }
         return 0;
  }
-EXPORT_SYMBOL(cond_resched_softirq);
+EXPORT_SYMBOL(__cond_resched_softirq);
  
  /**
   * yield - yield the current processor to other threads.
@@ -6658,7 +6687,7 @@ EXPORT_SYMBOL(yield);
   */
  void __sched io_schedule(void)
  {
-       struct rq *rq = &__raw_get_cpu_var(runqueues);
+       struct rq *rq = raw_rq();
  
         delayacct_blkio_start();
         atomic_inc(&rq->nr_iowait);
@@ -6670,7 +6699,7 @@ EXPORT_SYMBOL(io_schedule);
  
  long __sched io_schedule_timeout(long timeout)
  {
-       struct rq *rq = &__raw_get_cpu_var(runqueues);
+       struct rq *rq = raw_rq();
         long ret;
  
         delayacct_blkio_start();
@@ -7625,7 +7654,7 @@ static int __init migration_init(void)
         migration_call(&migration_notifier, CPU_ONLINE, cpu);
         register_cpu_notifier(&migration_notifier);
  
-       return err;
+       return 0;
  }
  early_initcall(migration_init);
  #endif
@@ -9398,13 +9427,20 @@ void __init sched_init(void)
  }
  
  #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-void __might_sleep(char *file, int line)
+static inline int preempt_count_equals(int preempt_offset)
+{
+       int nested = preempt_count() & ~PREEMPT_ACTIVE;
+
+       return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
+}
+
+void __might_sleep(char *file, int line, int preempt_offset)
  {
  #ifdef in_atomic
         static unsigned long prev_jiffy;        /* ratelimiting */
  
-       if ((!in_atomic() && !irqs_disabled()) ||
-                   system_state != SYSTEM_RUNNING || oops_in_progress)
+       if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
+           system_state != SYSTEM_RUNNING || oops_in_progress)
                 return;
         if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
                 return;
author	Ingo Molnar <mingo@elte.hu>
	Sun, 2 Aug 2009 12:11:26 +0000 (14:11 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Sun, 2 Aug 2009 12:23:57 +0000 (14:23 +0200)
fs/dcache.c		patch \| blob \| history
fs/locks.c		patch \| blob \| history
include/linux/kernel.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
kernel/sched.c		patch \| blob \| history