Merge branch 'fortglx/3.14/time' of git://git.linaro.org/people/john.stultz/linux...
authorIngo Molnar <mingo@kernel.org>
Sun, 12 Jan 2014 13:13:31 +0000 (14:13 +0100)
committerIngo Molnar <mingo@kernel.org>
Sun, 12 Jan 2014 13:13:31 +0000 (14:13 +0100)
Pull timekeeping updates from John Stultz.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
1  2 
kernel/time/tick-common.c
kernel/time/tick-sched.c
kernel/time/timekeeping.c

index 162b03ab0ad2ed16e3654f7006aae6f88c1886cf,91c5f27e82a35191f33f45a08ef57a7d83d1d3db..20b2fe37d1053a21b138dbb2a7856e574c2ea9aa
@@@ -33,21 -33,6 +33,21 @@@ DEFINE_PER_CPU(struct tick_device, tick
   */
  ktime_t tick_next_period;
  ktime_t tick_period;
 +
 +/*
 + * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR
 + * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This
 + * variable has two functions:
 + *
 + * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the
 + *    timekeeping lock all at once. Only the CPU which is assigned to do the
 + *    update is handling it.
 + *
 + * 2) Hand off the duty in the NOHZ idle case by setting the value to
 + *    TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks
 + *    at it will take over and keep the time keeping alive.  The handover
 + *    procedure also covers cpu hotplug.
 + */
  int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
  
  /*
@@@ -85,6 -70,7 +85,7 @@@ static void tick_periodic(int cpu
  
                do_timer(1);
                write_sequnlock(&jiffies_lock);
+               update_wall_time();
        }
  
        update_process_times(user_mode(get_irq_regs()));
diff --combined kernel/time/tick-sched.c
index 52cee12b3302d92817fedb0c1a245bc2bc91a549,c58b03d89951fe72b923b7713b644fe2b0bb81d9..0ddd020bbaf2af54b7d4c5cc8594361c71853d8d
@@@ -86,6 -86,7 +86,7 @@@ static void tick_do_update_jiffies64(kt
                tick_next_period = ktime_add(last_jiffies_update, tick_period);
        }
        write_sequnlock(&jiffies_lock);
+       update_wall_time();
  }
  
  /*
@@@ -361,8 -362,8 +362,8 @@@ void __init tick_nohz_init(void
  /*
   * NO HZ enabled ?
   */
 -int tick_nohz_enabled __read_mostly  = 1;
 -
 +static int tick_nohz_enabled __read_mostly  = 1;
 +int tick_nohz_active  __read_mostly;
  /*
   * Enable / Disable tickless mode
   */
@@@ -461,7 -462,7 +462,7 @@@ u64 get_cpu_idle_time_us(int cpu, u64 *
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
        ktime_t now, idle;
  
 -      if (!tick_nohz_enabled)
 +      if (!tick_nohz_active)
                return -1;
  
        now = ktime_get();
@@@ -502,7 -503,7 +503,7 @@@ u64 get_cpu_iowait_time_us(int cpu, u6
        struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
        ktime_t now, iowait;
  
 -      if (!tick_nohz_enabled)
 +      if (!tick_nohz_active)
                return -1;
  
        now = ktime_get();
@@@ -707,10 -708,8 +708,10 @@@ static bool can_stop_idle_tick(int cpu
                return false;
        }
  
 -      if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
 +      if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
 +              ts->sleep_length = (ktime_t) { .tv64 = NSEC_PER_SEC/HZ };
                return false;
 +      }
  
        if (need_resched())
                return false;
@@@ -797,6 -796,11 +798,6 @@@ void tick_nohz_idle_enter(void
        local_irq_disable();
  
        ts = &__get_cpu_var(tick_cpu_sched);
 -      /*
 -       * set ts->inidle unconditionally. even if the system did not
 -       * switch to nohz mode the cpu frequency governers rely on the
 -       * update of the idle time accounting in tick_nohz_start_idle().
 -       */
        ts->inidle = 1;
        __tick_nohz_idle_enter(ts);
  
@@@ -965,7 -969,7 +966,7 @@@ static void tick_nohz_switch_to_nohz(vo
        struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
        ktime_t next;
  
 -      if (!tick_nohz_enabled)
 +      if (!tick_nohz_active)
                return;
  
        local_irq_disable();
                local_irq_enable();
                return;
        }
 -
 +      tick_nohz_active = 1;
        ts->nohz_mode = NOHZ_MODE_LOWRES;
  
        /*
@@@ -1129,10 -1133,8 +1130,10 @@@ void tick_setup_sched_timer(void
        }
  
  #ifdef CONFIG_NO_HZ_COMMON
 -      if (tick_nohz_enabled)
 +      if (tick_nohz_enabled) {
                ts->nohz_mode = NOHZ_MODE_HIGHRES;
 +              tick_nohz_active = 1;
 +      }
  #endif
  }
  #endif /* HIGH_RES_TIMERS */
index 87b4f00284c9e21f420d0865af937b03eb286f33,abfa4e86ac54eae74bc7d7714677df3a6e6244d6..0aa4ce81bc168e2432f8f76e1e62c0b41512d47b
@@@ -77,7 -77,7 +77,7 @@@ static void tk_set_wall_to_mono(struct 
        tk->wall_to_monotonic = wtm;
        set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
        tk->offs_real = timespec_to_ktime(tmp);
-       tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0));
+       tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
  }
  
  static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
@@@ -90,8 -90,9 +90,9 @@@
  }
  
  /**
-  * timekeeper_setup_internals - Set up internals to use clocksource clock.
+  * tk_setup_internals - Set up internals to use clocksource clock.
   *
+  * @tk:               The target timekeeper to setup.
   * @clock:            Pointer to clocksource.
   *
   * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
@@@ -595,7 -596,7 +596,7 @@@ s32 timekeeping_get_tai_offset(void
  static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
  {
        tk->tai_offset = tai_offset;
-       tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0));
+       tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
  }
  
  /**
@@@ -610,6 -611,7 +611,7 @@@ void timekeeping_set_tai_offset(s32 tai
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&timekeeper_seq);
        __timekeeping_set_tai_offset(tk, tai_offset);
+       timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
        write_seqcount_end(&timekeeper_seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
        clock_was_set();
@@@ -1023,6 -1025,8 +1025,8 @@@ static int timekeeping_suspend(void
                timekeeping_suspend_time =
                        timespec_add(timekeeping_suspend_time, delta_delta);
        }
+       timekeeping_update(tk, TK_MIRROR);
        write_seqcount_end(&timekeeper_seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  
@@@ -1130,16 -1134,6 +1134,6 @@@ static void timekeeping_adjust(struct t
                 * we can adjust by 1.
                 */
                error >>= 2;
-               /*
-                * XXX - In update_wall_time, we round up to the next
-                * nanosecond, and store the amount rounded up into
-                * the error. This causes the likely below to be unlikely.
-                *
-                * The proper fix is to avoid rounding up by using
-                * the high precision tk->xtime_nsec instead of
-                * xtime.tv_nsec everywhere. Fixing this will take some
-                * time.
-                */
                if (likely(error <= interval))
                        adj = 1;
                else
@@@ -1255,7 -1249,7 +1249,7 @@@ out_adjust
  static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
  {
        u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
-       unsigned int action = 0;
+       unsigned int clock_set = 0;
  
        while (tk->xtime_nsec >= nsecps) {
                int leap;
  
                        __timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
  
-                       clock_was_set_delayed();
-                       action = TK_CLOCK_WAS_SET;
+                       clock_set = TK_CLOCK_WAS_SET;
                }
        }
-       return action;
+       return clock_set;
  }
  
  /**
   * Returns the unconsumed cycles.
   */
  static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
-                                               u32 shift)
+                                               u32 shift,
+                                               unsigned int *clock_set)
  {
        cycle_t interval = tk->cycle_interval << shift;
        u64 raw_nsecs;
        tk->cycle_last += interval;
  
        tk->xtime_nsec += tk->xtime_interval << shift;
-       accumulate_nsecs_to_secs(tk);
+       *clock_set |= accumulate_nsecs_to_secs(tk);
  
        /* Accumulate raw time */
        raw_nsecs = (u64)tk->raw_interval << shift;
@@@ -1347,7 -1341,7 +1341,7 @@@ static inline void old_vsyscall_fixup(s
        tk->xtime_nsec -= remainder;
        tk->xtime_nsec += 1ULL << tk->shift;
        tk->ntp_error += remainder << tk->ntp_error_shift;
 -
 +      tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift;
  }
  #else
  #define old_vsyscall_fixup(tk)
   * update_wall_time - Uses the current clocksource to increment the wall time
   *
   */
static void update_wall_time(void)
+ void update_wall_time(void)
  {
        struct clocksource *clock;
        struct timekeeper *real_tk = &timekeeper;
        struct timekeeper *tk = &shadow_timekeeper;
        cycle_t offset;
        int shift = 0, maxshift;
-       unsigned int action;
+       unsigned int clock_set = 0;
        unsigned long flags;
  
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
        shift = min(shift, maxshift);
        while (offset >= tk->cycle_interval) {
-               offset = logarithmic_accumulation(tk, offset, shift);
+               offset = logarithmic_accumulation(tk, offset, shift,
+                                                       &clock_set);
                if (offset < tk->cycle_interval<<shift)
                        shift--;
        }
         * Finally, make sure that after the rounding
         * xtime_nsec isn't larger than NSEC_PER_SEC
         */
-       action = accumulate_nsecs_to_secs(tk);
+       clock_set |= accumulate_nsecs_to_secs(tk);
  
        write_seqcount_begin(&timekeeper_seq);
        /* Update clock->cycle_last with the new value */
         * updating.
         */
        memcpy(real_tk, tk, sizeof(*tk));
-       timekeeping_update(real_tk, action);
+       timekeeping_update(real_tk, clock_set);
        write_seqcount_end(&timekeeper_seq);
  out:
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+       if (clock_set)
+               clock_was_set();
  }
  
  /**
@@@ -1583,7 -1580,6 +1580,6 @@@ struct timespec get_monotonic_coarse(vo
  void do_timer(unsigned long ticks)
  {
        jiffies_64 += ticks;
-       update_wall_time();
        calc_global_load(ticks);
  }
  
@@@ -1698,12 -1694,14 +1694,14 @@@ int do_adjtimex(struct timex *txc
  
        if (tai != orig_tai) {
                __timekeeping_set_tai_offset(tk, tai);
-               update_pvclock_gtod(tk, true);
-               clock_was_set_delayed();
+               timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
        }
        write_seqcount_end(&timekeeper_seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
  
+       if (tai != orig_tai)
+               clock_was_set();
        ntp_notify_cmos_timer();
  
        return ret;
@@@ -1739,4 -1737,5 +1737,5 @@@ void xtime_update(unsigned long ticks
        write_seqlock(&jiffies_lock);
        do_timer(ticks);
        write_sequnlock(&jiffies_lock);
+       update_wall_time();
  }