Merge branches 'timers/clocksource', 'timers/hrtimers', 'timers/nohz', 'timers/ntp...

[sfrench/cifs-2.6.git] / include / linux / sched.h
diff --git a/include/linux/sched.h b/include/linux/sched.h

index cfb0d87b99fcafb8ff0eb753e0ea489dc20478dd..81c68fef4431746411d30c9a936cc8652c3d5b81 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -352,7 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
  extern void arch_unmap_area(struct mm_struct *, unsigned long);
  extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
  
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+#if USE_SPLIT_PTLOCKS
  /*
   * The mm counters are not protected by its page_table_lock,
   * so must be incremented atomically.
@@ -363,7 +363,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
  #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
  #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
  
-#else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#else  /* !USE_SPLIT_PTLOCKS */
  /*
   * The mm counters are protected by its page_table_lock,
   * so can be incremented directly.
@@ -374,7 +374,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
  #define inc_mm_counter(mm, member) (mm)->_##member++
  #define dec_mm_counter(mm, member) (mm)->_##member--
  
-#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+#endif /* !USE_SPLIT_PTLOCKS */
  
  #define get_mm_rss(mm)                                 \
         (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
@@ -425,6 +425,39 @@ struct pacct_struct {
         unsigned long           ac_minflt, ac_majflt;
  };
  
+/**
+ * struct task_cputime - collected CPU time counts
+ * @utime:             time spent in user mode, in &cputime_t units
+ * @stime:             time spent in kernel mode, in &cputime_t units
+ * @sum_exec_runtime:  total time spent on the CPU, in nanoseconds
+ *
+ * This structure groups together three kinds of CPU time that are
+ * tracked for threads and thread groups.  Most things considering
+ * CPU time want to group these counts together and treat all three
+ * of them in parallel.
+ */
+struct task_cputime {
+       cputime_t utime;
+       cputime_t stime;
+       unsigned long long sum_exec_runtime;
+};
+/* Alternate field names when used to cache expirations. */
+#define prof_exp       stime
+#define virt_exp       utime
+#define sched_exp      sum_exec_runtime
+
+/**
+ * struct thread_group_cputime - thread group interval timer counts
+ * @totals:            thread group interval timers; substructure for
+ *                     uniprocessor kernel, per-cpu for SMP kernel.
+ *
+ * This structure contains the version of task_cputime, above, that is
+ * used for thread group CPU clock calculations.
+ */
+struct thread_group_cputime {
+       struct task_cputime *totals;
+};
+
  /*
   * NOTE! "signal_struct" does not have it's own
   * locking, because a shared signal_struct always
@@ -451,8 +484,8 @@ struct signal_struct {
          * - everyone except group_exit_task is stopped during signal delivery
          *   of fatal signals, group_exit_task processes the signal.
          */
-       struct task_struct      *group_exit_task;
         int                     notify_count;
+       struct task_struct      *group_exit_task;
  
         /* thread group stop support, overloads group_exit_code too */
         int                     group_stop_count;
@@ -470,6 +503,17 @@ struct signal_struct {
         cputime_t it_prof_expires, it_virt_expires;
         cputime_t it_prof_incr, it_virt_incr;
  
+       /*
+        * Thread group totals for process CPU clocks.
+        * See thread_group_cputime(), et al, for details.
+        */
+       struct thread_group_cputime cputime;
+
+       /* Earliest-expiration cache. */
+       struct task_cputime cputime_expires;
+
+       struct list_head cpu_timers[3];
+
         /* job control IDs */
  
         /*
@@ -500,7 +544,7 @@ struct signal_struct {
          * Live threads maintain their own counters and add to these
          * in __exit_signal, except for the group leader.
          */
-       cputime_t utime, stime, cutime, cstime;
+       cputime_t cutime, cstime;
         cputime_t gtime;
         cputime_t cgtime;
         unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -508,14 +552,6 @@ struct signal_struct {
         unsigned long inblock, oublock, cinblock, coublock;
         struct task_io_accounting ioac;
  
-       /*
-        * Cumulative ns of scheduled CPU time for dead threads in the
-        * group, not including a zombie group leader.  (This only differs
-        * from jiffies_to_ns(utime + stime) if sched_clock uses something
-        * other than jiffies.)
-        */
-       unsigned long long sum_sched_runtime;
-
         /*
          * We don't bother to synchronize most readers of this at all,
          * because there is no reader checking a limit that actually needs
@@ -527,8 +563,6 @@ struct signal_struct {
          */
         struct rlimit rlim[RLIM_NLIMITS];
  
-       struct list_head cpu_timers[3];
-
         /* keep the process-shared keyrings here so that they do the right
          * thing in threads created with CLONE_THREAD */
  #ifdef CONFIG_KEYS
@@ -824,6 +858,9 @@ struct sched_domain {
         unsigned int ttwu_move_affine;
         unsigned int ttwu_move_balance;
  #endif
+#ifdef CONFIG_SCHED_DEBUG
+       char *name;
+#endif
  };
  
  extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
@@ -897,7 +934,7 @@ struct sched_class {
         void (*yield_task) (struct rq *rq);
         int  (*select_task_rq)(struct task_struct *p, int sync);
  
-       void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
+       void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
  
         struct task_struct * (*pick_next_task) (struct rq *rq);
         void (*put_prev_task) (struct rq *rq, struct task_struct *p);
@@ -1010,8 +1047,8 @@ struct sched_entity {
  
  struct sched_rt_entity {
         struct list_head run_list;
-       unsigned int time_slice;
         unsigned long timeout;
+       unsigned int time_slice;
         int nr_cpus_allowed;
  
         struct sched_rt_entity *back;
@@ -1134,8 +1171,7 @@ struct task_struct {
  /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
         unsigned long min_flt, maj_flt;
  
-       cputime_t it_prof_expires, it_virt_expires;
-       unsigned long long it_sched_expires;
+       struct task_cputime cputime_expires;
         struct list_head cpu_timers[3];
  
  /* process credentials */
@@ -1475,6 +1511,10 @@ static inline void put_task_struct(struct task_struct *t)
                 __put_task_struct(t);
  }
  
+extern cputime_t task_utime(struct task_struct *p);
+extern cputime_t task_stime(struct task_struct *p);
+extern cputime_t task_gtime(struct task_struct *p);
+
  /*
   * Per process flags
   */
@@ -1581,6 +1621,7 @@ extern unsigned long long cpu_clock(int cpu);
  
  extern unsigned long long
  task_sched_runtime(struct task_struct *task);
+extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
  
  /* sched_exec is called by processes performing an exec */
  #ifdef CONFIG_SMP
@@ -2077,6 +2118,30 @@ static inline int spin_needbreak(spinlock_t *lock)
  #endif
  }
  
+/*
+ * Thread group CPU time accounting.
+ */
+
+extern int thread_group_cputime_alloc(struct task_struct *);
+extern void thread_group_cputime(struct task_struct *, struct task_cputime *);
+
+static inline void thread_group_cputime_init(struct signal_struct *sig)
+{
+       sig->cputime.totals = NULL;
+}
+
+static inline int thread_group_cputime_clone_thread(struct task_struct *curr)
+{
+       if (curr->signal->cputime.totals)
+               return 0;
+       return thread_group_cputime_alloc(curr);
+}
+
+static inline void thread_group_cputime_free(struct signal_struct *sig)
+{
+       free_percpu(sig->cputime.totals);
+}
+
  /*
   * Reevaluate whether the task has signals pending delivery.
   * Wake the task if so.