Merge tag 'iommu-updates-v6.8' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...

[sfrench/cifs-2.6.git] / include / linux / sched.h
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 70888a36677b60ce7205e877d68076d65c14b183..cdb8ea53c365ba45be4041c887de4c9d1c22afcd 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -10,33 +10,41 @@
  #include <uapi/linux/sched.h>
  
  #include <asm/current.h>
-
-#include <linux/pid.h>
-#include <linux/sem.h>
+#include <asm/processor.h>
+#include <linux/thread_info.h>
+#include <linux/preempt.h>
+#include <linux/cpumask.h>
+
+#include <linux/cache.h>
+#include <linux/irqflags_types.h>
+#include <linux/smp_types.h>
+#include <linux/pid_types.h>
+#include <linux/sem_types.h>
  #include <linux/shm.h>
  #include <linux/kmsan_types.h>
-#include <linux/mutex.h>
-#include <linux/plist.h>
-#include <linux/hrtimer.h>
-#include <linux/irqflags.h>
-#include <linux/seccomp.h>
-#include <linux/nodemask.h>
-#include <linux/rcupdate.h>
-#include <linux/refcount.h>
+#include <linux/mutex_types.h>
+#include <linux/plist_types.h>
+#include <linux/hrtimer_types.h>
+#include <linux/timer_types.h>
+#include <linux/seccomp_types.h>
+#include <linux/nodemask_types.h>
+#include <linux/refcount_types.h>
  #include <linux/resource.h>
  #include <linux/latencytop.h>
  #include <linux/sched/prio.h>
  #include <linux/sched/types.h>
  #include <linux/signal_types.h>
-#include <linux/syscall_user_dispatch.h>
+#include <linux/syscall_user_dispatch_types.h>
  #include <linux/mm_types_task.h>
  #include <linux/task_io_accounting.h>
-#include <linux/posix-timers.h>
-#include <linux/rseq.h>
-#include <linux/seqlock.h>
+#include <linux/posix-timers_types.h>
+#include <linux/restart_block.h>
+#include <uapi/linux/rseq.h>
+#include <linux/seqlock_types.h>
  #include <linux/kcsan.h>
  #include <linux/rv.h>
  #include <linux/livepatch_sched.h>
+#include <linux/uidgid_types.h>
  #include <asm/kmap_size.h>
  
  /* task_struct member predeclarations (sorted alphabetically): */
@@ -63,11 +71,13 @@ struct robust_list_head;
  struct root_domain;
  struct rq;
  struct sched_attr;
+struct sched_dl_entity;
  struct seq_file;
  struct sighand_struct;
  struct signal_struct;
  struct task_delay_info;
  struct task_group;
+struct task_struct;
  struct user_event_mm;
  
  /*
@@ -413,42 +423,6 @@ struct load_weight {
         u32                             inv_weight;
  };
  
-/**
- * struct util_est - Estimation utilization of FAIR tasks
- * @enqueued: instantaneous estimated utilization of a task/cpu
- * @ewma:     the Exponential Weighted Moving Average (EWMA)
- *            utilization of a task
- *
- * Support data structure to track an Exponential Weighted Moving Average
- * (EWMA) of a FAIR task's utilization. New samples are added to the moving
- * average each time a task completes an activation. Sample's weight is chosen
- * so that the EWMA will be relatively insensitive to transient changes to the
- * task's workload.
- *
- * The enqueued attribute has a slightly different meaning for tasks and cpus:
- * - task:   the task's util_avg at last task dequeue time
- * - cfs_rq: the sum of util_est.enqueued for each RUNNABLE task on that CPU
- * Thus, the util_est.enqueued of a task represents the contribution on the
- * estimated utilization of the CPU where that task is currently enqueued.
- *
- * Only for tasks we track a moving average of the past instantaneous
- * estimated utilization. This allows to absorb sporadic drops in utilization
- * of an otherwise almost periodic task.
- *
- * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
- * updates. When a task is dequeued, its util_est should not be updated if its
- * util_avg has not been updated in the meantime.
- * This information is mapped into the MSB bit of util_est.enqueued at dequeue
- * time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg
- * for a task) it is safe to use MSB.
- */
-struct util_est {
-       unsigned int                    enqueued;
-       unsigned int                    ewma;
-#define UTIL_EST_WEIGHT_SHIFT          2
-#define UTIL_AVG_UNCHANGED             0x80000000
-} __attribute__((__aligned__(sizeof(u64))));
-
  /*
   * The load/runnable/util_avg accumulates an infinite geometric series
   * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
@@ -503,9 +477,20 @@ struct sched_avg {
         unsigned long                   load_avg;
         unsigned long                   runnable_avg;
         unsigned long                   util_avg;
-       struct util_est                 util_est;
+       unsigned int                    util_est;
  } ____cacheline_aligned;
  
+/*
+ * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
+ * updates. When a task is dequeued, its util_est should not be updated if its
+ * util_avg has not been updated in the meantime.
+ * This information is mapped into the MSB bit of util_est at dequeue time.
+ * Since max value of util_est for a task is 1024 (PELT util_avg for a task)
+ * it is safe to use MSB.
+ */
+#define UTIL_EST_WEIGHT_SHIFT          2
+#define UTIL_AVG_UNCHANGED             0x80000000
+
  struct sched_statistics {
  #ifdef CONFIG_SCHEDSTATS
         u64                             wait_start;
@@ -523,7 +508,7 @@ struct sched_statistics {
         u64                             block_max;
         s64                             sum_block_runtime;
  
-       u64                             exec_max;
+       s64                             exec_max;
         u64                             slice_max;
  
         u64                             nr_migrations_cold;
@@ -553,7 +538,7 @@ struct sched_entity {
         struct load_weight              load;
         struct rb_node                  run_node;
         u64                             deadline;
-       u64                             min_deadline;
+       u64                             min_vruntime;
  
         struct list_head                group_node;
         unsigned int                    on_rq;
@@ -607,6 +592,9 @@ struct sched_rt_entity {
  #endif
  } __randomize_layout;
  
+typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *);
+typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *);
+
  struct sched_dl_entity {
         struct rb_node                  rb_node;
  
@@ -654,6 +642,7 @@ struct sched_dl_entity {
         unsigned int                    dl_yielded        : 1;
         unsigned int                    dl_non_contending : 1;
         unsigned int                    dl_overrun        : 1;
+       unsigned int                    dl_server         : 1;
  
         /*
          * Bandwidth enforcement timer. Each -deadline task has its
@@ -668,7 +657,20 @@ struct sched_dl_entity {
          * timer is needed to decrease the active utilization at the correct
          * time.
          */
-       struct hrtimer inactive_timer;
+       struct hrtimer                  inactive_timer;
+
+       /*
+        * Bits for DL-server functionality. Also see the comment near
+        * dl_server_update().
+        *
+        * @rq the runqueue this server is for
+        *
+        * @server_has_tasks() returns true if @server_pick return a
+        * runnable task.
+        */
+       struct rq                       *rq;
+       dl_server_has_tasks_f           server_has_tasks;
+       dl_server_pick_f                server_pick;
  
  #ifdef CONFIG_RT_MUTEXES
         /*
@@ -795,6 +797,7 @@ struct task_struct {
         struct sched_entity             se;
         struct sched_rt_entity          rt;
         struct sched_dl_entity          dl;
+       struct sched_dl_entity          *dl_server;
         const struct sched_class        *sched_class;
  
  #ifdef CONFIG_SCHED_CORE
@@ -1561,114 +1564,6 @@ struct task_struct {
          */
  };
  
-static inline struct pid *task_pid(struct task_struct *task)
-{
-       return task->thread_pid;
-}
-
-/*
- * the helpers to get the task's different pids as they are seen
- * from various namespaces
- *
- * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
- * task_xid_vnr()    : virtual id, i.e. the id seen from the pid namespace of
- *                     current.
- * task_xid_nr_ns()  : id seen from the ns specified;
- *
- * see also pid_nr() etc in include/linux/pid.h
- */
-pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns);
-
-static inline pid_t task_pid_nr(struct task_struct *tsk)
-{
-       return tsk->pid;
-}
-
-static inline pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
-       return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
-}
-
-static inline pid_t task_pid_vnr(struct task_struct *tsk)
-{
-       return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
-}
-
-
-static inline pid_t task_tgid_nr(struct task_struct *tsk)
-{
-       return tsk->tgid;
-}
-
-/**
- * pid_alive - check that a task structure is not stale
- * @p: Task structure to be checked.
- *
- * Test if a process is not yet dead (at most zombie state)
- * If pid_alive fails, then pointers within the task structure
- * can be stale and must not be dereferenced.
- *
- * Return: 1 if the process is alive. 0 otherwise.
- */
-static inline int pid_alive(const struct task_struct *p)
-{
-       return p->thread_pid != NULL;
-}
-
-static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
-       return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
-}
-
-static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
-{
-       return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
-}
-
-
-static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
-       return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
-}
-
-static inline pid_t task_session_vnr(struct task_struct *tsk)
-{
-       return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
-}
-
-static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
-{
-       return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
-}
-
-static inline pid_t task_tgid_vnr(struct task_struct *tsk)
-{
-       return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL);
-}
-
-static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
-{
-       pid_t pid = 0;
-
-       rcu_read_lock();
-       if (pid_alive(tsk))
-               pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
-       rcu_read_unlock();
-
-       return pid;
-}
-
-static inline pid_t task_ppid_nr(const struct task_struct *tsk)
-{
-       return task_ppid_nr_ns(tsk, &init_pid_ns);
-}
-
-/* Obsolete, do not use: */
-static inline pid_t task_pgrp_nr(struct task_struct *tsk)
-{
-       return task_pgrp_nr_ns(tsk, &init_pid_ns);
-}
-
  #define TASK_REPORT_IDLE       (TASK_REPORT + 1)
  #define TASK_REPORT_MAX                (TASK_REPORT_IDLE << 1)
  
@@ -1712,20 +1607,6 @@ static inline char task_state_to_char(struct task_struct *tsk)
         return task_index_to_char(task_state_index(tsk));
  }
  
-/**
- * is_global_init - check if a task structure is init. Since init
- * is free to have sub-threads we need to check tgid.
- * @tsk: Task structure to be checked.
- *
- * Check if a task structure is the first user space task the kernel created.
- *
- * Return: 1 if the task structure is init. 0 otherwise.
- */
-static inline int is_global_init(struct task_struct *tsk)
-{
-       return task_tgid_nr(tsk) == 1;
-}
-
  extern struct pid *cad_pid;
  
  /*
@@ -1955,9 +1836,7 @@ extern void ia64_set_curr_task(int cpu, struct task_struct *p);
  void yield(void);
  
  union thread_union {
-#ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK
         struct task_struct task;
-#endif
  #ifndef CONFIG_THREAD_INFO_IN_TASK
         struct thread_info thread_info;
  #endif
@@ -2177,15 +2056,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock);
         __cond_resched_rwlock_write(lock);                                      \
  })
  
-static inline void cond_resched_rcu(void)
-{
-#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
-       rcu_read_unlock();
-       cond_resched();
-       rcu_read_lock();
-#endif
-}
-
  #ifdef CONFIG_PREEMPT_DYNAMIC
  
  extern bool preempt_model_none(void);
@@ -2227,37 +2097,6 @@ static inline bool preempt_model_preemptible(void)
         return preempt_model_full() || preempt_model_rt();
  }
  
-/*
- * Does a critical section need to be broken due to another
- * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
- * but a general need for low latency)
- */
-static inline int spin_needbreak(spinlock_t *lock)
-{
-#ifdef CONFIG_PREEMPTION
-       return spin_is_contended(lock);
-#else
-       return 0;
-#endif
-}
-
-/*
- * Check if a rwlock is contended.
- * Returns non-zero if there is another task waiting on the rwlock.
- * Returns zero if the lock is not contended or the system / underlying
- * rwlock implementation does not support contention detection.
- * Technically does not depend on CONFIG_PREEMPTION, but a general need
- * for low latency.
- */
-static inline int rwlock_needbreak(rwlock_t *lock)
-{
-#ifdef CONFIG_PREEMPTION
-       return rwlock_is_contended(lock);
-#else
-       return 0;
-#endif
-}
-
  static __always_inline bool need_resched(void)
  {
         return unlikely(tif_need_resched());
@@ -2292,6 +2131,8 @@ extern bool sched_task_on_rq(struct task_struct *p);
  extern unsigned long get_wchan(struct task_struct *p);
  extern struct task_struct *cpu_curr_snapshot(int cpu);
  
+#include <linux/spinlock.h>
+
  /*
   * In order to reduce various lock holder preemption latencies provide an
   * interface to see if a vCPU is currently running or not.
@@ -2328,129 +2169,6 @@ static inline bool owner_on_cpu(struct task_struct *owner)
  unsigned long sched_cpu_util(int cpu);
  #endif /* CONFIG_SMP */
  
-#ifdef CONFIG_RSEQ
-
-/*
- * Map the event mask on the user-space ABI enum rseq_cs_flags
- * for direct mask checks.
- */
-enum rseq_event_mask_bits {
-       RSEQ_EVENT_PREEMPT_BIT  = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
-       RSEQ_EVENT_SIGNAL_BIT   = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
-       RSEQ_EVENT_MIGRATE_BIT  = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
-};
-
-enum rseq_event_mask {
-       RSEQ_EVENT_PREEMPT      = (1U << RSEQ_EVENT_PREEMPT_BIT),
-       RSEQ_EVENT_SIGNAL       = (1U << RSEQ_EVENT_SIGNAL_BIT),
-       RSEQ_EVENT_MIGRATE      = (1U << RSEQ_EVENT_MIGRATE_BIT),
-};
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
-       if (t->rseq)
-               set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
-}
-
-void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
-
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
-                                            struct pt_regs *regs)
-{
-       if (current->rseq)
-               __rseq_handle_notify_resume(ksig, regs);
-}
-
-static inline void rseq_signal_deliver(struct ksignal *ksig,
-                                      struct pt_regs *regs)
-{
-       preempt_disable();
-       __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
-       preempt_enable();
-       rseq_handle_notify_resume(ksig, regs);
-}
-
-/* rseq_preempt() requires preemption to be disabled. */
-static inline void rseq_preempt(struct task_struct *t)
-{
-       __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
-       rseq_set_notify_resume(t);
-}
-
-/* rseq_migrate() requires preemption to be disabled. */
-static inline void rseq_migrate(struct task_struct *t)
-{
-       __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
-       rseq_set_notify_resume(t);
-}
-
-/*
- * If parent process has a registered restartable sequences area, the
- * child inherits. Unregister rseq for a clone with CLONE_VM set.
- */
-static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
-{
-       if (clone_flags & CLONE_VM) {
-               t->rseq = NULL;
-               t->rseq_len = 0;
-               t->rseq_sig = 0;
-               t->rseq_event_mask = 0;
-       } else {
-               t->rseq = current->rseq;
-               t->rseq_len = current->rseq_len;
-               t->rseq_sig = current->rseq_sig;
-               t->rseq_event_mask = current->rseq_event_mask;
-       }
-}
-
-static inline void rseq_execve(struct task_struct *t)
-{
-       t->rseq = NULL;
-       t->rseq_len = 0;
-       t->rseq_sig = 0;
-       t->rseq_event_mask = 0;
-}
-
-#else
-
-static inline void rseq_set_notify_resume(struct task_struct *t)
-{
-}
-static inline void rseq_handle_notify_resume(struct ksignal *ksig,
-                                            struct pt_regs *regs)
-{
-}
-static inline void rseq_signal_deliver(struct ksignal *ksig,
-                                      struct pt_regs *regs)
-{
-}
-static inline void rseq_preempt(struct task_struct *t)
-{
-}
-static inline void rseq_migrate(struct task_struct *t)
-{
-}
-static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
-{
-}
-static inline void rseq_execve(struct task_struct *t)
-{
-}
-
-#endif
-
-#ifdef CONFIG_DEBUG_RSEQ
-
-void rseq_syscall(struct pt_regs *regs);
-
-#else
-
-static inline void rseq_syscall(struct pt_regs *regs)
-{
-}
-
-#endif
-
  #ifdef CONFIG_SCHED_CORE
  extern void sched_core_free(struct task_struct *tsk);
  extern void sched_core_fork(struct task_struct *p);