Merge branch 'msr-bitmaps' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[sfrench/cifs-2.6.git] / kernel / sched / deadline.c
index 2473736c7616dd3810b5295a8a5fe4bc4c16b92c..9bb0e0c412ec6617c5ef6cfa0b4f703a6a536a23 100644 (file)
@@ -78,7 +78,7 @@ static inline int dl_bw_cpus(int i)
 #endif
 
 static inline
-void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
+void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
 {
        u64 old = dl_rq->running_bw;
 
@@ -86,10 +86,12 @@ void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
        dl_rq->running_bw += dl_bw;
        SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
        SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
+       /* kick cpufreq (see the comment in kernel/sched/sched.h). */
+       cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL);
 }
 
 static inline
-void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
+void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
 {
        u64 old = dl_rq->running_bw;
 
@@ -98,10 +100,12 @@ void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
        SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
        if (dl_rq->running_bw > old)
                dl_rq->running_bw = 0;
+       /* kick cpufreq (see the comment in kernel/sched/sched.h). */
+       cpufreq_update_util(rq_of_dl_rq(dl_rq), SCHED_CPUFREQ_DL);
 }
 
 static inline
-void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
+void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
 {
        u64 old = dl_rq->this_bw;
 
@@ -111,7 +115,7 @@ void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
 }
 
 static inline
-void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
+void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
 {
        u64 old = dl_rq->this_bw;
 
@@ -123,16 +127,46 @@ void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
        SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
 }
 
+static inline
+void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+{
+       if (!dl_entity_is_special(dl_se))
+               __add_rq_bw(dl_se->dl_bw, dl_rq);
+}
+
+static inline
+void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+{
+       if (!dl_entity_is_special(dl_se))
+               __sub_rq_bw(dl_se->dl_bw, dl_rq);
+}
+
+static inline
+void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+{
+       if (!dl_entity_is_special(dl_se))
+               __add_running_bw(dl_se->dl_bw, dl_rq);
+}
+
+static inline
+void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+{
+       if (!dl_entity_is_special(dl_se))
+               __sub_running_bw(dl_se->dl_bw, dl_rq);
+}
+
 void dl_change_utilization(struct task_struct *p, u64 new_bw)
 {
        struct rq *rq;
 
+       BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
+
        if (task_on_rq_queued(p))
                return;
 
        rq = task_rq(p);
        if (p->dl.dl_non_contending) {
-               sub_running_bw(p->dl.dl_bw, &rq->dl);
+               sub_running_bw(&p->dl, &rq->dl);
                p->dl.dl_non_contending = 0;
                /*
                 * If the timer handler is currently running and the
@@ -144,8 +178,8 @@ void dl_change_utilization(struct task_struct *p, u64 new_bw)
                if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
                        put_task_struct(p);
        }
-       sub_rq_bw(p->dl.dl_bw, &rq->dl);
-       add_rq_bw(new_bw, &rq->dl);
+       __sub_rq_bw(p->dl.dl_bw, &rq->dl);
+       __add_rq_bw(new_bw, &rq->dl);
 }
 
 /*
@@ -217,6 +251,9 @@ static void task_non_contending(struct task_struct *p)
        if (dl_se->dl_runtime == 0)
                return;
 
+       if (dl_entity_is_special(dl_se))
+               return;
+
        WARN_ON(hrtimer_active(&dl_se->inactive_timer));
        WARN_ON(dl_se->dl_non_contending);
 
@@ -236,12 +273,12 @@ static void task_non_contending(struct task_struct *p)
         */
        if (zerolag_time < 0) {
                if (dl_task(p))
-                       sub_running_bw(dl_se->dl_bw, dl_rq);
+                       sub_running_bw(dl_se, dl_rq);
                if (!dl_task(p) || p->state == TASK_DEAD) {
                        struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
 
                        if (p->state == TASK_DEAD)
-                               sub_rq_bw(p->dl.dl_bw, &rq->dl);
+                               sub_rq_bw(&p->dl, &rq->dl);
                        raw_spin_lock(&dl_b->lock);
                        __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
                        __dl_clear_params(p);
@@ -268,7 +305,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
                return;
 
        if (flags & ENQUEUE_MIGRATED)
-               add_rq_bw(dl_se->dl_bw, dl_rq);
+               add_rq_bw(dl_se, dl_rq);
 
        if (dl_se->dl_non_contending) {
                dl_se->dl_non_contending = 0;
@@ -289,7 +326,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
                 * when the "inactive timer" fired).
                 * So, add it back.
                 */
-               add_running_bw(dl_se->dl_bw, dl_rq);
+               add_running_bw(dl_se, dl_rq);
        }
 }
 
@@ -1114,7 +1151,8 @@ static void update_curr_dl(struct rq *rq)
 {
        struct task_struct *curr = rq->curr;
        struct sched_dl_entity *dl_se = &curr->dl;
-       u64 delta_exec;
+       u64 delta_exec, scaled_delta_exec;
+       int cpu = cpu_of(rq);
 
        if (!dl_task(curr) || !on_dl_rq(dl_se))
                return;
@@ -1134,9 +1172,6 @@ static void update_curr_dl(struct rq *rq)
                return;
        }
 
-       /* kick cpufreq (see the comment in kernel/sched/sched.h). */
-       cpufreq_update_util(rq, SCHED_CPUFREQ_DL);
-
        schedstat_set(curr->se.statistics.exec_max,
                      max(curr->se.statistics.exec_max, delta_exec));
 
@@ -1148,13 +1183,39 @@ static void update_curr_dl(struct rq *rq)
 
        sched_rt_avg_update(rq, delta_exec);
 
-       if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM))
-               delta_exec = grub_reclaim(delta_exec, rq, &curr->dl);
-       dl_se->runtime -= delta_exec;
+       if (dl_entity_is_special(dl_se))
+               return;
+
+       /*
+        * For tasks that participate in GRUB, we implement GRUB-PA: the
+        * spare reclaimed bandwidth is used to clock down frequency.
+        *
+        * For the others, we still need to scale reservation parameters
+        * according to current frequency and CPU maximum capacity.
+        */
+       if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
+               scaled_delta_exec = grub_reclaim(delta_exec,
+                                                rq,
+                                                &curr->dl);
+       } else {
+               unsigned long scale_freq = arch_scale_freq_capacity(cpu);
+               unsigned long scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
+
+               scaled_delta_exec = cap_scale(delta_exec, scale_freq);
+               scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
+       }
+
+       dl_se->runtime -= scaled_delta_exec;
 
 throttle:
        if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
                dl_se->dl_throttled = 1;
+
+               /* If requested, inform the user about runtime overruns. */
+               if (dl_runtime_exceeded(dl_se) &&
+                   (dl_se->flags & SCHED_FLAG_DL_OVERRUN))
+                       dl_se->dl_overrun = 1;
+
                __dequeue_task_dl(rq, curr, 0);
                if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr)))
                        enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
@@ -1204,8 +1265,8 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
                struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
 
                if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
-                       sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
-                       sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
+                       sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
+                       sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
                        dl_se->dl_non_contending = 0;
                }
 
@@ -1222,7 +1283,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
        sched_clock_tick();
        update_rq_clock(rq);
 
-       sub_running_bw(dl_se->dl_bw, &rq->dl);
+       sub_running_bw(dl_se, &rq->dl);
        dl_se->dl_non_contending = 0;
 unlock:
        task_rq_unlock(rq, p, &rf);
@@ -1416,8 +1477,8 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
                dl_check_constrained_dl(&p->dl);
 
        if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
-               add_rq_bw(p->dl.dl_bw, &rq->dl);
-               add_running_bw(p->dl.dl_bw, &rq->dl);
+               add_rq_bw(&p->dl, &rq->dl);
+               add_running_bw(&p->dl, &rq->dl);
        }
 
        /*
@@ -1457,8 +1518,8 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
        __dequeue_task_dl(rq, p, flags);
 
        if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
-               sub_running_bw(p->dl.dl_bw, &rq->dl);
-               sub_rq_bw(p->dl.dl_bw, &rq->dl);
+               sub_running_bw(&p->dl, &rq->dl);
+               sub_rq_bw(&p->dl, &rq->dl);
        }
 
        /*
@@ -1564,7 +1625,7 @@ static void migrate_task_rq_dl(struct task_struct *p)
         */
        raw_spin_lock(&rq->lock);
        if (p->dl.dl_non_contending) {
-               sub_running_bw(p->dl.dl_bw, &rq->dl);
+               sub_running_bw(&p->dl, &rq->dl);
                p->dl.dl_non_contending = 0;
                /*
                 * If the timer handler is currently running and the
@@ -1576,7 +1637,7 @@ static void migrate_task_rq_dl(struct task_struct *p)
                if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
                        put_task_struct(p);
        }
-       sub_rq_bw(p->dl.dl_bw, &rq->dl);
+       sub_rq_bw(&p->dl, &rq->dl);
        raw_spin_unlock(&rq->lock);
 }
 
@@ -2019,11 +2080,11 @@ retry:
        }
 
        deactivate_task(rq, next_task, 0);
-       sub_running_bw(next_task->dl.dl_bw, &rq->dl);
-       sub_rq_bw(next_task->dl.dl_bw, &rq->dl);
+       sub_running_bw(&next_task->dl, &rq->dl);
+       sub_rq_bw(&next_task->dl, &rq->dl);
        set_task_cpu(next_task, later_rq->cpu);
-       add_rq_bw(next_task->dl.dl_bw, &later_rq->dl);
-       add_running_bw(next_task->dl.dl_bw, &later_rq->dl);
+       add_rq_bw(&next_task->dl, &later_rq->dl);
+       add_running_bw(&next_task->dl, &later_rq->dl);
        activate_task(later_rq, next_task, 0);
        ret = 1;
 
@@ -2111,11 +2172,11 @@ static void pull_dl_task(struct rq *this_rq)
                        resched = true;
 
                        deactivate_task(src_rq, p, 0);
-                       sub_running_bw(p->dl.dl_bw, &src_rq->dl);
-                       sub_rq_bw(p->dl.dl_bw, &src_rq->dl);
+                       sub_running_bw(&p->dl, &src_rq->dl);
+                       sub_rq_bw(&p->dl, &src_rq->dl);
                        set_task_cpu(p, this_cpu);
-                       add_rq_bw(p->dl.dl_bw, &this_rq->dl);
-                       add_running_bw(p->dl.dl_bw, &this_rq->dl);
+                       add_rq_bw(&p->dl, &this_rq->dl);
+                       add_running_bw(&p->dl, &this_rq->dl);
                        activate_task(this_rq, p, 0);
                        dmin = p->dl.deadline;
 
@@ -2224,7 +2285,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
                task_non_contending(p);
 
        if (!task_on_rq_queued(p))
-               sub_rq_bw(p->dl.dl_bw, &rq->dl);
+               sub_rq_bw(&p->dl, &rq->dl);
 
        /*
         * We cannot use inactive_task_timer() to invoke sub_running_bw()
@@ -2256,7 +2317,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 
        /* If p is not queued we will update its parameters at next wakeup. */
        if (!task_on_rq_queued(p)) {
-               add_rq_bw(p->dl.dl_bw, &rq->dl);
+               add_rq_bw(&p->dl, &rq->dl);
 
                return;
        }
@@ -2435,6 +2496,9 @@ int sched_dl_overflow(struct task_struct *p, int policy,
        u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
        int cpus, err = -1;
 
+       if (attr->sched_flags & SCHED_FLAG_SUGOV)
+               return 0;
+
        /* !deadline task may carry old deadline bandwidth */
        if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
                return 0;
@@ -2521,6 +2585,10 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
  */
 bool __checkparam_dl(const struct sched_attr *attr)
 {
+       /* special dl tasks don't actually use any parameter */
+       if (attr->sched_flags & SCHED_FLAG_SUGOV)
+               return true;
+
        /* deadline != 0 */
        if (attr->sched_deadline == 0)
                return false;
@@ -2566,6 +2634,7 @@ void __dl_clear_params(struct task_struct *p)
        dl_se->dl_throttled = 0;
        dl_se->dl_yielded = 0;
        dl_se->dl_non_contending = 0;
+       dl_se->dl_overrun = 0;
 }
 
 bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)