Merge tag 'v4.0-rc6' into timers/core, before applying new patches

author Ingo Molnar <mingo@kernel.org>

Tue, 31 Mar 2015 07:08:13 +0000 (09:08 +0200)

committer Ingo Molnar <mingo@kernel.org>

Tue, 31 Mar 2015 07:08:13 +0000 (09:08 +0200)
author Ingo Molnar <mingo@kernel.org>
Tue, 31 Mar 2015 07:08:13 +0000 (09:08 +0200)
committer Ingo Molnar <mingo@kernel.org>
Tue, 31 Mar 2015 07:08:13 +0000 (09:08 +0200)
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c

index 6eaddc47c43dfbd60f52b6d49c425acbd55cab62..d4f970a4d255acebedef4bd39e6b2121651db8e1 100644 (file)
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -152,7 +152,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
         unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
         struct completion inbound_alive;
         struct tick_device *tdev;
-       enum clock_event_mode tdev_mode;
+       enum clock_event_state tdev_state;
         long volatile *handshake_ptr;
         int ipi_nr, ret;
  
@@ -223,8 +223,8 @@ static int bL_switch_to(unsigned int new_cluster_id)
         if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
                 tdev = NULL;
         if (tdev) {
-               tdev_mode = tdev->evtdev->mode;
-               clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+               tdev_state = tdev->evtdev->state;
+               clockevents_set_state(tdev->evtdev, CLOCK_EVT_STATE_SHUTDOWN);
         }
  
         ret = cpu_pm_enter();
@@ -252,7 +252,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
         ret = cpu_pm_exit();
  
         if (tdev) {
-               clockevents_set_mode(tdev->evtdev, tdev_mode);
+               clockevents_set_state(tdev->evtdev, tdev_state);
                 clockevents_program_event(tdev->evtdev,
                                           tdev->evtdev->next_event, 1);
         }
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c

index 61b4d705c26720eb88f2dbcf7eab72361d31d14d..43cf74561cfd02fa6b350f8a3b41410b9407f8a6 100644 (file)
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -103,7 +103,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
  
         /*
          * 120000 rough estimate from the calculations in
-        * __clocksource_updatefreq_scale.
+        * __clocksource_update_freq_scale.
          */
         clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
                         32768, NSEC_PER_SEC, 120000);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c

index 32aeea083d93b2391122ad9d1c49c3925121e38b..ec37ab3f524f303419d2cc3a82b79c119e61de1d 100644 (file)
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -200,7 +200,7 @@ up_fail:
  void update_vsyscall(struct timekeeper *tk)
  {
         struct timespec xtime_coarse;
-       u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
+       u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
  
         ++vdso_data->tb_seq_count;
         smp_wmb();
@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
         vdso_data->wtm_clock_nsec               = tk->wall_to_monotonic.tv_nsec;
  
         if (!use_syscall) {
-               vdso_data->cs_cycle_last        = tk->tkr.cycle_last;
+               vdso_data->cs_cycle_last        = tk->tkr_mono.cycle_last;
                 vdso_data->xtime_clock_sec      = tk->xtime_sec;
-               vdso_data->xtime_clock_nsec     = tk->tkr.xtime_nsec;
-               vdso_data->cs_mult              = tk->tkr.mult;
-               vdso_data->cs_shift             = tk->tkr.shift;
+               vdso_data->xtime_clock_nsec     = tk->tkr_mono.xtime_nsec;
+               vdso_data->cs_mult              = tk->tkr_mono.mult;
+               vdso_data->cs_shift             = tk->tkr_mono.shift;
         }
  
         smp_wmb();
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c

index 20660dddb2d67f1e4ebdd16a2a89ff99602935f1..170ddd2018b31667df8619b471df42b7fb562705 100644 (file)
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
  {
         u64 nsecps;
  
-       if (tk->tkr.clock != &clocksource_tod)
+       if (tk->tkr_mono.clock != &clocksource_tod)
                 return;
  
         /* Make userspace gettimeofday spin until we're done. */
         ++vdso_data->tb_update_count;
         smp_wmb();
-       vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
+       vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
         vdso_data->xtime_clock_sec = tk->xtime_sec;
-       vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
+       vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
         vdso_data->wtom_clock_sec =
                 tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-       vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
-               + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
-       nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
+       vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+               + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+       nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
         while (vdso_data->wtom_clock_nsec >= nsecps) {
                 vdso_data->wtom_clock_nsec -= nsecps;
                 vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
  
         vdso_data->xtime_coarse_sec = tk->xtime_sec;
         vdso_data->xtime_coarse_nsec =
-               (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+               (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
         vdso_data->wtom_coarse_sec =
                 vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
         vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
                 vdso_data->wtom_coarse_sec++;
         }
  
-       vdso_data->tk_mult = tk->tkr.mult;
-       vdso_data->tk_shift = tk->tkr.shift;
+       vdso_data->tk_mult = tk->tkr_mono.mult;
+       vdso_data->tk_shift = tk->tkr_mono.shift;
         smp_wmb();
         ++vdso_data->tb_update_count;
  }
@@ -283,7 +283,7 @@ void __init time_init(void)
         if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
                 panic("Couldn't request external interrupt 0x1406");
  
-       if (clocksource_register(&clocksource_tod) != 0)
+       if (__clocksource_register(&clocksource_tod) != 0)
                 panic("Could not register TOD clock source");
  
         /* Enable TOD clock interrupts on the boot cpu. */
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c

index 2f80d23a0a44964ebff804a2ab4428b9f30cb162..18147a5523d947736a3e7c0815a7eeb3526ec271 100644 (file)
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
         .rating = 100,
         .read   = timer_cs_read,
         .mask   = CLOCKSOURCE_MASK(64),
-       .shift  = 2,
         .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
  };
  
  static __init int setup_timer_cs(void)
  {
         timer_cs_enabled = 1;
-       timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
-                                           timer_cs.shift);
-
-       return clocksource_register(&timer_cs);
+       return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
  }
  
  #ifdef CONFIG_SMP
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c

index d412b0856c0a2622b13b9bc84dc39f65c1b5b08c..00178ecf9aeab4731ffd24d86ff7bc28252e23b6 100644 (file)
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
  
  void update_vsyscall(struct timekeeper *tk)
  {
-       if (tk->tkr.clock != &cycle_counter_cs)
+       if (tk->tkr_mono.clock != &cycle_counter_cs)
                 return;
  
         write_seqcount_begin(&vdso_data->tb_seq);
  
-       vdso_data->cycle_last           = tk->tkr.cycle_last;
-       vdso_data->mask                 = tk->tkr.mask;
-       vdso_data->mult                 = tk->tkr.mult;
-       vdso_data->shift                = tk->tkr.shift;
+       vdso_data->cycle_last           = tk->tkr_mono.cycle_last;
+       vdso_data->mask                 = tk->tkr_mono.mask;
+       vdso_data->mult                 = tk->tkr_mono.mult;
+       vdso_data->shift                = tk->tkr_mono.shift;
  
         vdso_data->wall_time_sec        = tk->xtime_sec;
-       vdso_data->wall_time_snsec      = tk->tkr.xtime_nsec;
+       vdso_data->wall_time_snsec      = tk->tkr_mono.xtime_nsec;
  
         vdso_data->monotonic_time_sec   = tk->xtime_sec
                                         + tk->wall_to_monotonic.tv_sec;
-       vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
+       vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
                                         + ((u64)tk->wall_to_monotonic.tv_nsec
-                                               << tk->tkr.shift);
+                                               << tk->tkr_mono.shift);
         while (vdso_data->monotonic_time_snsec >=
-                                       (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+                                       (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                 vdso_data->monotonic_time_snsec -=
-                                       ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+                                       ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
                 vdso_data->monotonic_time_sec++;
         }
  
         vdso_data->wall_time_coarse_sec = tk->xtime_sec;
-       vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
-                                                tk->tkr.shift);
+       vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
+                                                tk->tkr_mono.shift);
  
         vdso_data->monotonic_time_coarse_sec =
                 vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c

index c7d791f32b98e4f8598684306f086c6193ff6e93..51e3304169951619362ea4a1494716e4f20696bf 100644 (file)
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
         gtod_write_begin(vdata);
  
         /* copy vsyscall data */
-       vdata->vclock_mode      = tk->tkr.clock->archdata.vclock_mode;
-       vdata->cycle_last       = tk->tkr.cycle_last;
-       vdata->mask             = tk->tkr.mask;
-       vdata->mult             = tk->tkr.mult;
-       vdata->shift            = tk->tkr.shift;
+       vdata->vclock_mode      = tk->tkr_mono.clock->archdata.vclock_mode;
+       vdata->cycle_last       = tk->tkr_mono.cycle_last;
+       vdata->mask             = tk->tkr_mono.mask;
+       vdata->mult             = tk->tkr_mono.mult;
+       vdata->shift            = tk->tkr_mono.shift;
  
         vdata->wall_time_sec            = tk->xtime_sec;
-       vdata->wall_time_snsec          = tk->tkr.xtime_nsec;
+       vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
  
         vdata->monotonic_time_sec       = tk->xtime_sec
                                         + tk->wall_to_monotonic.tv_sec;
-       vdata->monotonic_time_snsec     = tk->tkr.xtime_nsec
+       vdata->monotonic_time_snsec     = tk->tkr_mono.xtime_nsec
                                         + ((u64)tk->wall_to_monotonic.tv_nsec
-                                               << tk->tkr.shift);
+                                               << tk->tkr_mono.shift);
         while (vdata->monotonic_time_snsec >=
-                                       (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+                                       (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                 vdata->monotonic_time_snsec -=
-                                       ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+                                       ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
                 vdata->monotonic_time_sec++;
         }
  
         vdata->wall_time_coarse_sec     = tk->xtime_sec;
-       vdata->wall_time_coarse_nsec    = (long)(tk->tkr.xtime_nsec >>
-                                                tk->tkr.shift);
+       vdata->wall_time_coarse_nsec    = (long)(tk->tkr_mono.xtime_nsec >>
+                                                tk->tkr_mono.shift);
  
         vdata->monotonic_time_coarse_sec =
                 vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 32bf19ef3115f65c9dffc23a655be2763babcaff..0ee725f1896d1134e50eaf12496e55005f615f7e 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1070,19 +1070,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
         struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
         u64 boot_ns;
  
-       boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
+       boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
  
         write_seqcount_begin(&vdata->seq);
  
         /* copy pvclock gtod data */
-       vdata->clock.vclock_mode        = tk->tkr.clock->archdata.vclock_mode;
-       vdata->clock.cycle_last         = tk->tkr.cycle_last;
-       vdata->clock.mask               = tk->tkr.mask;
-       vdata->clock.mult               = tk->tkr.mult;
-       vdata->clock.shift              = tk->tkr.shift;
+       vdata->clock.vclock_mode        = tk->tkr_mono.clock->archdata.vclock_mode;
+       vdata->clock.cycle_last         = tk->tkr_mono.cycle_last;
+       vdata->clock.mask               = tk->tkr_mono.mask;
+       vdata->clock.mult               = tk->tkr_mono.mult;
+       vdata->clock.shift              = tk->tkr_mono.shift;
  
         vdata->boot_ns                  = boot_ns;
-       vdata->nsec_base                = tk->tkr.xtime_nsec;
+       vdata->nsec_base                = tk->tkr_mono.xtime_nsec;
  
         write_seqcount_end(&vdata->seq);
  }
diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c

index d0a7bd66b8b91e0eb9aa4f7092ec42869a9067d8..dc3c6ee04aaa009e117fcb43fb065394ddfd6fae 100644 (file)
--- a/drivers/clocksource/em_sti.c
+++ b/drivers/clocksource/em_sti.c
@@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs)
  
         ret = em_sti_start(p, USER_CLOCKSOURCE);
         if (!ret)
-               __clocksource_updatefreq_hz(cs, p->rate);
+               __clocksource_update_freq_hz(cs, p->rate);
         return ret;
  }
  
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c

index 2bd13b53b727635500051746e27299c89d8d9b60..b8ff3c64cc452a16fc4108426fb6e5b1c54e91e8 100644 (file)
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
  
         ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
         if (!ret) {
-               __clocksource_updatefreq_hz(cs, ch->rate);
+               __clocksource_update_freq_hz(cs, ch->rate);
                 ch->cs_enabled = true;
         }
         return ret;
diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c

index f150ca82bfaf106a7ef2c5a40dd12a1e098e39f0..b6b8fa3cd211fc6b03460f678168d0f6568362f0 100644 (file)
--- a/drivers/clocksource/sh_tmu.c
+++ b/drivers/clocksource/sh_tmu.c
@@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs)
  
         ret = sh_tmu_enable(ch);
         if (!ret) {
-               __clocksource_updatefreq_hz(cs, ch->rate);
+               __clocksource_update_freq_hz(cs, ch->rate);
                 ch->cs_enabled = true;
         }
  
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h

index 2e4cb67f6e560094aa719fe75f595dfbb562cf8e..e20232c3320ab29b64c888ee474a9bc1aec3da66 100644 (file)
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -32,7 +32,7 @@ enum clock_event_nofitiers {
  struct clock_event_device;
  struct module;
  
-/* Clock event mode commands */
+/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */
  enum clock_event_mode {
         CLOCK_EVT_MODE_UNUSED = 0,
         CLOCK_EVT_MODE_SHUTDOWN,
@@ -41,6 +41,24 @@ enum clock_event_mode {
         CLOCK_EVT_MODE_RESUME,
  };
  
+/*
+ * Possible states of a clock event device.
+ *
+ * DETACHED:   Device is not used by clockevents core. Initial state or can be
+ *             reached from SHUTDOWN.
+ * SHUTDOWN:   Device is powered-off. Can be reached from PERIODIC or ONESHOT.
+ * PERIODIC:   Device is programmed to generate events periodically. Can be
+ *             reached from DETACHED or SHUTDOWN.
+ * ONESHOT:    Device is programmed to generate event only once. Can be reached
+ *             from DETACHED or SHUTDOWN.
+ */
+enum clock_event_state {
+       CLOCK_EVT_STATE_DETACHED = 0,
+       CLOCK_EVT_STATE_SHUTDOWN,
+       CLOCK_EVT_STATE_PERIODIC,
+       CLOCK_EVT_STATE_ONESHOT,
+};
+
  /*
   * Clock event features
   */
@@ -78,10 +96,15 @@ enum clock_event_mode {
   * @min_delta_ns:      minimum delta value in ns
   * @mult:              nanosecond to cycles multiplier
   * @shift:             nanoseconds to cycles divisor (power of two)
- * @mode:              operating mode assigned by the management code
+ * @mode:              operating mode, relevant only to ->set_mode(), OBSOLETE
+ * @state:             current state of the device, assigned by the core code
   * @features:          features
   * @retries:           number of forced programming retries
- * @set_mode:          set mode function
+ * @set_mode:          legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
+ * @set_state_periodic:        switch state to periodic, if !set_mode
+ * @set_state_oneshot: switch state to oneshot, if !set_mode
+ * @set_state_shutdown:        switch state to shutdown, if !set_mode
+ * @tick_resume:       resume clkevt device, if !set_mode
   * @broadcast:         function to broadcast events
   * @min_delta_ticks:   minimum delta value in ticks stored for reconfiguration
   * @max_delta_ticks:   maximum delta value in ticks stored for reconfiguration
@@ -105,12 +128,24 @@ struct clock_event_device {
         u32                     mult;
         u32                     shift;
         enum clock_event_mode   mode;
+       enum clock_event_state  state;
         unsigned int            features;
         unsigned long           retries;
  
-       void                    (*broadcast)(const struct cpumask *mask);
+       /*
+        * State transition callback(s): Only one of the two groups should be
+        * defined:
+        * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
+        * - set_state_{shutdown|periodic|oneshot}(), tick_resume().
+        */
         void                    (*set_mode)(enum clock_event_mode mode,
                                             struct clock_event_device *);
+       int                     (*set_state_periodic)(struct clock_event_device *);
+       int                     (*set_state_oneshot)(struct clock_event_device *);
+       int                     (*set_state_shutdown)(struct clock_event_device *);
+       int                     (*tick_resume)(struct clock_event_device *);
+
+       void                    (*broadcast)(const struct cpumask *mask);
         void                    (*suspend)(struct clock_event_device *);
         void                    (*resume)(struct clock_event_device *);
         unsigned long           min_delta_ticks;
@@ -160,8 +195,8 @@ extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);
  
  extern void clockevents_exchange_device(struct clock_event_device *old,
                                         struct clock_event_device *new);
-extern void clockevents_set_mode(struct clock_event_device *dev,
-                                enum clock_event_mode mode);
+extern void clockevents_set_state(struct clock_event_device *dev,
+                                 enum clock_event_state state);
  extern int clockevents_program_event(struct clock_event_device *dev,
                                      ktime_t expires, bool force);
  
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h

index 9c78d15d33e4de979b3d70d6a569064aa5529048..135509821c3994f4083bd2f0eb8f7c05409c026c 100644 (file)
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -56,6 +56,7 @@ struct module;
   * @shift:             cycle to nanosecond divisor (power of two)
   * @max_idle_ns:       max idle time permitted by the clocksource (nsecs)
   * @maxadj:            maximum adjustment value to mult (~11%)
+ * @max_cycles:                maximum safe cycle value which won't overflow on multiplication
   * @flags:             flags describing special properties
   * @archdata:          arch-specific data
   * @suspend:           suspend function for the clocksource, if necessary
@@ -76,7 +77,7 @@ struct clocksource {
  #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
         struct arch_clocksource_data archdata;
  #endif
-
+       u64 max_cycles;
         const char *name;
         struct list_head list;
         int rating;
@@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
  }
  
  
-extern int clocksource_register(struct clocksource*);
  extern int clocksource_unregister(struct clocksource*);
  extern void clocksource_touch_watchdog(void);
  extern struct clocksource* clocksource_get_next(void);
@@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void);
  extern void clocksource_mark_unstable(struct clocksource *cs);
  
  extern u64
-clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
  extern void
  clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
  
@@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
  extern int
  __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
  extern void
-__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
+__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
+
+/*
+ * Don't call this unless you are a default clocksource
+ * (AKA: jiffies) and absolutely have to.
+ */
+static inline int __clocksource_register(struct clocksource *cs)
+{
+       return __clocksource_register_scale(cs, 1, 0);
+}
  
  static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
  {
@@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
         return __clocksource_register_scale(cs, 1000, khz);
  }
  
-static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
+static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
  {
-       __clocksource_updatefreq_scale(cs, 1, hz);
+       __clocksource_update_freq_scale(cs, 1, hz);
  }
  
-static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
+static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
  {
-       __clocksource_updatefreq_scale(cs, 1000, khz);
+       __clocksource_update_freq_scale(cs, 1000, khz);
  }
  
  
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h

index 05af9a3348934602456ec18b9cd29df84ac5a167..fb86963859c772846dfc531fc9cc8c0825f36ac7 100644 (file)
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -16,16 +16,16 @@
   * @read:      Read function of @clock
   * @mask:      Bitmask for two's complement subtraction of non 64bit clocks
   * @cycle_last: @clock cycle value at last update
- * @mult:      NTP adjusted multiplier for scaled math conversion
+ * @mult:      (NTP adjusted) multiplier for scaled math conversion
   * @shift:     Shift value for scaled math conversion
   * @xtime_nsec: Shifted (fractional) nano seconds offset for readout
- * @base_mono:  ktime_t (nanoseconds) base time for readout
+ * @base:      ktime_t (nanoseconds) base time for readout
   *
   * This struct has size 56 byte on 64 bit. Together with a seqcount it
   * occupies a single 64byte cache line.
   *
   * The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessor to clock monotonic.
+ * for a fast NMI safe accessors.
   */
  struct tk_read_base {
         struct clocksource      *clock;
@@ -35,12 +35,13 @@ struct tk_read_base {
         u32                     mult;
         u32                     shift;
         u64                     xtime_nsec;
-       ktime_t                 base_mono;
+       ktime_t                 base;
  };
  
  /**
   * struct timekeeper - Structure holding internal timekeeping values.
- * @tkr:               The readout base structure
+ * @tkr_mono:          The readout base structure for CLOCK_MONOTONIC
+ * @tkr_raw:           The readout base structure for CLOCK_MONOTONIC_RAW
   * @xtime_sec:         Current CLOCK_REALTIME time in seconds
   * @ktime_sec:         Current CLOCK_MONOTONIC time in seconds
   * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
@@ -48,7 +49,6 @@ struct tk_read_base {
   * @offs_boot:         Offset clock monotonic -> clock boottime
   * @offs_tai:          Offset clock monotonic -> clock tai
   * @tai_offset:                The current UTC to TAI offset in seconds
- * @base_raw:          Monotonic raw base time in ktime_t format
   * @raw_time:          Monotonic raw base time in timespec64 format
   * @cycle_interval:    Number of clock cycles in one NTP interval
   * @xtime_interval:    Number of clock shifted nano seconds in one NTP
@@ -76,7 +76,8 @@ struct tk_read_base {
   * used instead.
   */
  struct timekeeper {
-       struct tk_read_base     tkr;
+       struct tk_read_base     tkr_mono;
+       struct tk_read_base     tkr_raw;
         u64                     xtime_sec;
         unsigned long           ktime_sec;
         struct timespec64       wall_to_monotonic;
@@ -84,7 +85,6 @@ struct timekeeper {
         ktime_t                 offs_boot;
         ktime_t                 offs_tai;
         s32                     tai_offset;
-       ktime_t                 base_raw;
         struct timespec64       raw_time;
  
         /* The following members are for timekeeping internal use */
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h

index 3eaae47542751962579a3c6736f18917e4da7ad3..5047b83483d6675f5309dfbb04eb33c3d67ba9bc 100644 (file)
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void)
         return ktime_to_ns(ktime_get_boottime());
  }
  
+static inline u64 ktime_get_tai_ns(void)
+{
+       return ktime_to_ns(ktime_get_clocktai());
+}
+
  static inline u64 ktime_get_raw_ns(void)
  {
         return ktime_to_ns(ktime_get_raw());
  }
  
  extern u64 ktime_get_mono_fast_ns(void);
+extern u64 ktime_get_raw_fast_ns(void);
  
  /*
   * Timespec interfaces utilizing the ktime based ones
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c

index 55449909f11475372135ac61b33e65114eb151ba..73689df1e4b8e8abac15eabee75609110804199b 100644 (file)
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -94,25 +94,76 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
  }
  EXPORT_SYMBOL_GPL(clockevent_delta2ns);
  
+static int __clockevents_set_state(struct clock_event_device *dev,
+                                  enum clock_event_state state)
+{
+       /* Transition with legacy set_mode() callback */
+       if (dev->set_mode) {
+               /* Legacy callback doesn't support new modes */
+               if (state > CLOCK_EVT_STATE_ONESHOT)
+                       return -ENOSYS;
+               /*
+                * 'clock_event_state' and 'clock_event_mode' have 1-to-1
+                * mapping until *_ONESHOT, and so a simple cast will work.
+                */
+               dev->set_mode((enum clock_event_mode)state, dev);
+               dev->mode = (enum clock_event_mode)state;
+               return 0;
+       }
+
+       if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+               return 0;
+
+       /* Transition with new state-specific callbacks */
+       switch (state) {
+       case CLOCK_EVT_STATE_DETACHED:
+               /*
+                * This is an internal state, which is guaranteed to go from
+                * SHUTDOWN to DETACHED. No driver interaction required.
+                */
+               return 0;
+
+       case CLOCK_EVT_STATE_SHUTDOWN:
+               return dev->set_state_shutdown(dev);
+
+       case CLOCK_EVT_STATE_PERIODIC:
+               /* Core internal bug */
+               if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
+                       return -ENOSYS;
+               return dev->set_state_periodic(dev);
+
+       case CLOCK_EVT_STATE_ONESHOT:
+               /* Core internal bug */
+               if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+                       return -ENOSYS;
+               return dev->set_state_oneshot(dev);
+
+       default:
+               return -ENOSYS;
+       }
+}
+
  /**
- * clockevents_set_mode - set the operating mode of a clock event device
+ * clockevents_set_state - set the operating state of a clock event device
   * @dev:       device to modify
- * @mode:      new mode
+ * @state:     new state
   *
   * Must be called with interrupts disabled !
   */
-void clockevents_set_mode(struct clock_event_device *dev,
-                                enum clock_event_mode mode)
+void clockevents_set_state(struct clock_event_device *dev,
+                          enum clock_event_state state)
  {
-       if (dev->mode != mode) {
-               dev->set_mode(mode, dev);
-               dev->mode = mode;
+       if (dev->state != state) {
+               if (__clockevents_set_state(dev, state))
+                       return;
+
+               dev->state = state;
  
                 /*
                  * A nsec2cyc multiplicator of 0 is invalid and we'd crash
                  * on it, so fix it up and emit a warning:
                  */
-               if (mode == CLOCK_EVT_MODE_ONESHOT) {
+               if (state == CLOCK_EVT_STATE_ONESHOT) {
                         if (unlikely(!dev->mult)) {
                                 dev->mult = 1;
                                 WARN_ON(1);
@@ -127,10 +178,28 @@ void clockevents_set_mode(struct clock_event_device *dev,
   */
  void clockevents_shutdown(struct clock_event_device *dev)
  {
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
         dev->next_event.tv64 = KTIME_MAX;
  }
  
+/**
+ * clockevents_tick_resume -   Resume the tick device before using it again
+ * @dev:                       device to resume
+ */
+int clockevents_tick_resume(struct clock_event_device *dev)
+{
+       int ret = 0;
+
+       if (dev->set_mode) {
+               dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
+               dev->mode = CLOCK_EVT_MODE_RESUME;
+       } else if (dev->tick_resume) {
+               ret = dev->tick_resume(dev);
+       }
+
+       return ret;
+}
+
  #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
  
  /* Limit min_delta to a jiffie */
@@ -183,7 +252,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
                 delta = dev->min_delta_ns;
                 dev->next_event = ktime_add_ns(ktime_get(), delta);
  
-               if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+               if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
                         return 0;
  
                 dev->retries++;
@@ -220,7 +289,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
         delta = dev->min_delta_ns;
         dev->next_event = ktime_add_ns(ktime_get(), delta);
  
-       if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+       if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
                 return 0;
  
         dev->retries++;
@@ -252,7 +321,7 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
  
         dev->next_event = expires;
  
-       if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+       if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
                 return 0;
  
         /* Shortcut for clockevent devices that can deal with ktime. */
@@ -297,7 +366,7 @@ static int clockevents_replace(struct clock_event_device *ced)
         struct clock_event_device *dev, *newdev = NULL;
  
         list_for_each_entry(dev, &clockevent_devices, list) {
-               if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
+               if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED)
                         continue;
  
                 if (!tick_check_replacement(newdev, dev))
@@ -323,7 +392,7 @@ static int clockevents_replace(struct clock_event_device *ced)
  static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
  {
         /* Fast track. Device is unused */
-       if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
+       if (ced->state == CLOCK_EVT_STATE_DETACHED) {
                 list_del_init(&ced->list);
                 return 0;
         }
@@ -373,6 +442,37 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
  }
  EXPORT_SYMBOL_GPL(clockevents_unbind);
  
+/* Sanity check of state transition callbacks */
+static int clockevents_sanity_check(struct clock_event_device *dev)
+{
+       /* Legacy set_mode() callback */
+       if (dev->set_mode) {
+               /* We shouldn't be supporting new modes now */
+               WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
+                       dev->set_state_shutdown || dev->tick_resume);
+
+               BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+               return 0;
+       }
+
+       if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+               return 0;
+
+       /* New state-specific callbacks */
+       if (!dev->set_state_shutdown)
+               return -EINVAL;
+
+       if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
+           !dev->set_state_periodic)
+               return -EINVAL;
+
+       if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
+           !dev->set_state_oneshot)
+               return -EINVAL;
+
+       return 0;
+}
+
  /**
   * clockevents_register_device - register a clock event device
   * @dev:       device to register
@@ -381,7 +481,11 @@ void clockevents_register_device(struct clock_event_device *dev)
  {
         unsigned long flags;
  
-       BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+       BUG_ON(clockevents_sanity_check(dev));
+
+       /* Initialize state to DETACHED */
+       dev->state = CLOCK_EVT_STATE_DETACHED;
+
         if (!dev->cpumask) {
                 WARN_ON(num_possible_cpus() > 1);
                 dev->cpumask = cpumask_of(smp_processor_id());
@@ -445,11 +549,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
  {
         clockevents_config(dev, freq);
  
-       if (dev->mode == CLOCK_EVT_MODE_ONESHOT)
+       if (dev->state == CLOCK_EVT_STATE_ONESHOT)
                 return clockevents_program_event(dev, dev->next_event, false);
  
-       if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
-               dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
+       if (dev->state == CLOCK_EVT_STATE_PERIODIC)
+               return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
  
         return 0;
  }
@@ -505,13 +609,13 @@ void clockevents_exchange_device(struct clock_event_device *old,
          */
         if (old) {
                 module_put(old->owner);
-               clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
+               clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED);
                 list_del(&old->list);
                 list_add(&old->list, &clockevents_released);
         }
  
         if (new) {
-               BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
+               BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED);
                 clockevents_shutdown(new);
         }
         local_irq_restore(flags);
@@ -597,7 +701,7 @@ int clockevents_notify(unsigned long reason, void *arg)
                         if (cpumask_test_cpu(cpu, dev->cpumask) &&
                             cpumask_weight(dev->cpumask) == 1 &&
                             !tick_is_broadcast_device(dev)) {
-                               BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+                               BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED);
                                 list_del(&dev->list);
                         }
                 }
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c

index 4892352f0e4989c561c5d16ba3b27c063082a8f6..c3be3c71bbad58bb2ab61e18f8bb4924b36832f7 100644 (file)
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
                 schedule_work(&watchdog_work);
  }
  
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
-{
-       printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
-              cs->name, delta);
-       __clocksource_unstable(cs);
-}
-
  /**
   * clocksource_mark_unstable - mark clocksource unstable via watchdog
   * @cs:                clocksource to be marked unstable
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
  static void clocksource_watchdog(unsigned long data)
  {
         struct clocksource *cs;
-       cycle_t csnow, wdnow, delta;
+       cycle_t csnow, wdnow, cslast, wdlast, delta;
         int64_t wd_nsec, cs_nsec;
         int next_cpu, reset_pending;
  
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
  
                 delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
                 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+               wdlast = cs->wd_last; /* save these in case we print them */
+               cslast = cs->cs_last;
                 cs->cs_last = csnow;
                 cs->wd_last = wdnow;
  
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
  
                 /* Check the deviation from the watchdog clocksource. */
                 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
-                       clocksource_unstable(cs, cs_nsec - wd_nsec);
+                       pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
+                       pr_warn("       '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
+                               watchdog->name, wdnow, wdlast, watchdog->mask);
+                       pr_warn("       '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
+                               cs->name, csnow, cslast, cs->mask);
+                       __clocksource_unstable(cs);
                         continue;
                 }
  
@@ -469,26 +469,22 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
   * @shift:     cycle to nanosecond divisor (power of two)
   * @maxadj:    maximum adjustment value to mult (~11%)
   * @mask:      bitmask for two's complement subtraction of non 64 bit counters
+ * @max_cyc:   maximum cycle value before potential overflow (does not include
+ *             any safety margin)
+ *
+ * NOTE: This function includes a safety margin of 50%, so that bad clock values
+ * can be detected.
   */
-u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
  {
         u64 max_nsecs, max_cycles;
  
         /*
          * Calculate the maximum number of cycles that we can pass to the
-        * cyc2ns function without overflowing a 64-bit signed result. The
-        * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
-        * which is equivalent to the below.
-        * max_cycles < (2^63)/(mult + maxadj)
-        * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
-        * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
-        * max_cycles < 2^(63 - log2(mult + maxadj))
-        * max_cycles < 1 << (63 - log2(mult + maxadj))
-        * Please note that we add 1 to the result of the log2 to account for
-        * any rounding errors, ensure the above inequality is satisfied and
-        * no overflow will occur.
+        * cyc2ns() function without overflowing a 64-bit result.
          */
-       max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
+       max_cycles = ULLONG_MAX;
+       do_div(max_cycles, mult+maxadj);
  
         /*
          * The actual maximum number of cycles we can defer the clocksource is
@@ -499,27 +495,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
         max_cycles = min(max_cycles, mask);
         max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
  
+       /* return the max_cycles value as well if requested */
+       if (max_cyc)
+               *max_cyc = max_cycles;
+
+       /* Return 50% of the actual maximum, so we can detect bad values */
+       max_nsecs >>= 1;
+
         return max_nsecs;
  }
  
  /**
- * clocksource_max_deferment - Returns max time the clocksource can be deferred
- * @cs:         Pointer to clocksource
+ * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
+ * @cs:         Pointer to clocksource to be updated
   *
   */
-static u64 clocksource_max_deferment(struct clocksource *cs)
+static inline void clocksource_update_max_deferment(struct clocksource *cs)
  {
-       u64 max_nsecs;
-
-       max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
-                                         cs->mask);
-       /*
-        * To ensure that the clocksource does not wrap whilst we are idle,
-        * limit the time the clocksource can be deferred by 12.5%. Please
-        * note a margin of 12.5% is used because this can be computed with
-        * a shift, versus say 10% which would require division.
-        */
-       return max_nsecs - (max_nsecs >> 3);
+       cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+                                               cs->maxadj, cs->mask,
+                                               &cs->max_cycles);
  }
  
  #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -648,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs)
  }
  
  /**
- * __clocksource_updatefreq_scale - Used update clocksource with new freq
+ * __clocksource_update_freq_scale - Used update clocksource with new freq
   * @cs:                clocksource to be registered
   * @scale:     Scale factor multiplied against freq to get clocksource hz
   * @freq:      clocksource frequency (cycles per second) divided by scale
@@ -656,48 +651,64 @@ static void clocksource_enqueue(struct clocksource *cs)
   * This should only be called from the clocksource->enable() method.
   *
   * This *SHOULD NOT* be called directly! Please use the
- * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
+ * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
+ * functions.
   */
-void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
+void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
  {
         u64 sec;
+
         /*
-        * Calc the maximum number of seconds which we can run before
-        * wrapping around. For clocksources which have a mask > 32bit
-        * we need to limit the max sleep time to have a good
-        * conversion precision. 10 minutes is still a reasonable
-        * amount. That results in a shift value of 24 for a
-        * clocksource with mask >= 40bit and f >= 4GHz. That maps to
-        * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
-        * margin as we do in clocksource_max_deferment()
+        * Default clocksources are *special* and self-define their mult/shift.
+        * But, you're not special, so you should specify a freq value.
          */
-       sec = (cs->mask - (cs->mask >> 3));
-       do_div(sec, freq);
-       do_div(sec, scale);
-       if (!sec)
-               sec = 1;
-       else if (sec > 600 && cs->mask > UINT_MAX)
-               sec = 600;
-
-       clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
-                              NSEC_PER_SEC / scale, sec * scale);
-
+       if (freq) {
+               /*
+                * Calc the maximum number of seconds which we can run before
+                * wrapping around. For clocksources which have a mask > 32-bit
+                * we need to limit the max sleep time to have a good
+                * conversion precision. 10 minutes is still a reasonable
+                * amount. That results in a shift value of 24 for a
+                * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
+                * ~ 0.06ppm granularity for NTP.
+                */
+               sec = cs->mask;
+               do_div(sec, freq);
+               do_div(sec, scale);
+               if (!sec)
+                       sec = 1;
+               else if (sec > 600 && cs->mask > UINT_MAX)
+                       sec = 600;
+
+               clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+                                      NSEC_PER_SEC / scale, sec * scale);
+       }
         /*
-        * for clocksources that have large mults, to avoid overflow.
-        * Since mult may be adjusted by ntp, add an safety extra margin
-        *
+        * Ensure clocksources that have large 'mult' values don't overflow
+        * when adjusted.
          */
         cs->maxadj = clocksource_max_adjustment(cs);
-       while ((cs->mult + cs->maxadj < cs->mult)
-               || (cs->mult - cs->maxadj > cs->mult)) {
+       while (freq && ((cs->mult + cs->maxadj < cs->mult)
+               || (cs->mult - cs->maxadj > cs->mult))) {
                 cs->mult >>= 1;
                 cs->shift--;
                 cs->maxadj = clocksource_max_adjustment(cs);
         }
  
-       cs->max_idle_ns = clocksource_max_deferment(cs);
+       /*
+        * Only warn for *special* clocksources that self-define
+        * their mult/shift values and don't specify a freq.
+        */
+       WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+               "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
+               cs->name);
+
+       clocksource_update_max_deferment(cs);
+
+       pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
+                       cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
  }
-EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
+EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
  
  /**
   * __clocksource_register_scale - Used to install new clocksources
@@ -714,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
  {
  
         /* Initialize mult/shift and max_idle_ns */
-       __clocksource_updatefreq_scale(cs, scale, freq);
+       __clocksource_update_freq_scale(cs, scale, freq);
  
         /* Add clocksource to the clocksource list */
         mutex_lock(&clocksource_mutex);
@@ -726,33 +737,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
  }
  EXPORT_SYMBOL_GPL(__clocksource_register_scale);
  
-
-/**
- * clocksource_register - Used to install new clocksources
- * @cs:                clocksource to be registered
- *
- * Returns -EBUSY if registration fails, zero otherwise.
- */
-int clocksource_register(struct clocksource *cs)
-{
-       /* calculate max adjustment for given mult/shift */
-       cs->maxadj = clocksource_max_adjustment(cs);
-       WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
-               "Clocksource %s might overflow on 11%% adjustment\n",
-               cs->name);
-
-       /* calculate max idle time permitted for this clocksource */
-       cs->max_idle_ns = clocksource_max_deferment(cs);
-
-       mutex_lock(&clocksource_mutex);
-       clocksource_enqueue(cs);
-       clocksource_enqueue_watchdog(cs);
-       clocksource_select();
-       mutex_unlock(&clocksource_mutex);
-       return 0;
-}
-EXPORT_SYMBOL(clocksource_register);
-
  static void __clocksource_change_rating(struct clocksource *cs, int rating)
  {
         list_del(&cs->list);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c

index a6a5bf53e86d25575f90518399407a4fb65a85ed..c4bb518725b5f904fe01804095788026de3a31f1 100644 (file)
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
         .mask           = 0xffffffff, /*32bits*/
         .mult           = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
         .shift          = JIFFIES_SHIFT,
+       .max_cycles     = 10,
  };
  
  __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
  
  static int __init init_jiffies_clocksource(void)
  {
-       return clocksource_register(&clocksource_jiffies);
+       return __clocksource_register(&clocksource_jiffies);
  }
  
  core_initcall(init_jiffies_clocksource);
@@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
  
         refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
  
-       clocksource_register(&refined_jiffies);
+       __clocksource_register(&refined_jiffies);
         return 0;
  }
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c

index 01d2d15aa66233dc62db43f8e988a0f5519a729b..a26036d37a3895f163a20abdde5c6361d0110cf1 100644 (file)
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -1,5 +1,6 @@
  /*
- * sched_clock.c: support for extending counters to full 64-bit ns counter
+ * sched_clock.c: Generic sched_clock() support, to extend low level
+ *                hardware time counters to full 64-bit ns values.
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of the GNU General Public License version 2 as
@@ -18,15 +19,53 @@
  #include <linux/seqlock.h>
  #include <linux/bitops.h>
  
-struct clock_data {
-       ktime_t wrap_kt;
+/**
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns:          sched_clock() value at last update
+ * @epoch_cyc:         Clock cycle value at last update.
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ *                     clocks.
+ * @read_sched_clock:  Current clock source (or dummy source when suspended).
+ * @mult:              Multipler for scaled math conversion.
+ * @shift:             Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
         u64 epoch_ns;
         u64 epoch_cyc;
-       seqcount_t seq;
-       unsigned long rate;
+       u64 sched_clock_mask;
+       u64 (*read_sched_clock)(void);
         u32 mult;
         u32 shift;
-       bool suspended;
+};
+
+/**
+ * struct clock_data - all data needed for sched_clock() (including
+ *                     registration of a new clock source)
+ *
+ * @seq:               Sequence counter for protecting updates. The lowest
+ *                     bit is the index for @read_data.
+ * @read_data:         Data required to read from sched_clock.
+ * @wrap_kt:           Duration for which clock can run before wrapping.
+ * @rate:              Tick rate of the registered clock.
+ * @actual_read_sched_clock: Registered hardware level clock read function.
+ *
+ * The ordering of this structure has been chosen to optimize cache
+ * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
+ * into a single 64-byte cache line.
+ */
+struct clock_data {
+       seqcount_t              seq;
+       struct clock_read_data  read_data[2];
+       ktime_t                 wrap_kt;
+       unsigned long           rate;
+
+       u64 (*actual_read_sched_clock)(void);
  };
  
  static struct hrtimer sched_clock_timer;
@@ -34,12 +73,6 @@ static int irqtime = -1;
  
  core_param(irqtime, irqtime, int, 0400);
  
-static struct clock_data cd = {
-       .mult   = NSEC_PER_SEC / HZ,
-};
-
-static u64 __read_mostly sched_clock_mask;
-
  static u64 notrace jiffy_sched_clock_read(void)
  {
         /*
@@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
         return (u64)(jiffies - INITIAL_JIFFIES);
  }
  
-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static struct clock_data cd ____cacheline_aligned = {
+       .read_data[0] = { .mult = NSEC_PER_SEC / HZ,
+                         .read_sched_clock = jiffy_sched_clock_read, },
+       .actual_read_sched_clock = jiffy_sched_clock_read,
+};
  
  static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
  {
@@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
  
  unsigned long long notrace sched_clock(void)
  {
-       u64 epoch_ns;
-       u64 epoch_cyc;
-       u64 cyc;
+       u64 cyc, res;
         unsigned long seq;
-
-       if (cd.suspended)
-               return cd.epoch_ns;
+       struct clock_read_data *rd;
  
         do {
-               seq = raw_read_seqcount_begin(&cd.seq);
-               epoch_cyc = cd.epoch_cyc;
-               epoch_ns = cd.epoch_ns;
+               seq = raw_read_seqcount(&cd.seq);
+               rd = cd.read_data + (seq & 1);
+
+               cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
+                     rd->sched_clock_mask;
+               res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
         } while (read_seqcount_retry(&cd.seq, seq));
  
-       cyc = read_sched_clock();
-       cyc = (cyc - epoch_cyc) & sched_clock_mask;
-       return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
+       return res;
+}
+
+/*
+ * Updating the data required to read the clock.
+ *
+ * sched_clock() will never observe mis-matched data even if called from
+ * an NMI. We do this by maintaining an odd/even copy of the data and
+ * steering sched_clock() to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock() as much
+ * as possible the system reverts back to the even copy when the update
+ * completes; the odd copy is used *only* during an update.
+ */
+static void update_clock_read_data(struct clock_read_data *rd)
+{
+       /* update the backup (odd) copy with the new data */
+       cd.read_data[1] = *rd;
+
+       /* steer readers towards the odd copy */
+       raw_write_seqcount_latch(&cd.seq);
+
+       /* now its safe for us to update the normal (even) copy */
+       cd.read_data[0] = *rd;
+
+       /* switch readers back to the even copy */
+       raw_write_seqcount_latch(&cd.seq);
  }
  
  /*
- * Atomically update the sched_clock epoch.
+ * Atomically update the sched_clock() epoch.
   */
-static void notrace update_sched_clock(void)
+static void update_sched_clock(void)
  {
-       unsigned long flags;
         u64 cyc;
         u64 ns;
+       struct clock_read_data rd;
+
+       rd = cd.read_data[0];
+
+       cyc = cd.actual_read_sched_clock();
+       ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+
+       rd.epoch_ns = ns;
+       rd.epoch_cyc = cyc;
  
-       cyc = read_sched_clock();
-       ns = cd.epoch_ns +
-               cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-                         cd.mult, cd.shift);
-
-       raw_local_irq_save(flags);
-       raw_write_seqcount_begin(&cd.seq);
-       cd.epoch_ns = ns;
-       cd.epoch_cyc = cyc;
-       raw_write_seqcount_end(&cd.seq);
-       raw_local_irq_restore(flags);
+       update_clock_read_data(&rd);
  }
  
  static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
  {
         update_sched_clock();
         hrtimer_forward_now(hrt, cd.wrap_kt);
+
         return HRTIMER_RESTART;
  }
  
-void __init sched_clock_register(u64 (*read)(void), int bits,
-                                unsigned long rate)
+void __init
+sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
  {
         u64 res, wrap, new_mask, new_epoch, cyc, ns;
         u32 new_mult, new_shift;
-       ktime_t new_wrap_kt;
         unsigned long r;
         char r_unit;
+       struct clock_read_data rd;
  
         if (cd.rate > rate)
                 return;
  
         WARN_ON(!irqs_disabled());
  
-       /* calculate the mult/shift to convert counter ticks to ns. */
+       /* Calculate the mult/shift to convert counter ticks to ns. */
         clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
  
         new_mask = CLOCKSOURCE_MASK(bits);
+       cd.rate = rate;
+
+       /* Calculate how many nanosecs until we risk wrapping */
+       wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
+       cd.wrap_kt = ns_to_ktime(wrap);
  
-       /* calculate how many ns until we wrap */
-       wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
-       new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
+       rd = cd.read_data[0];
  
-       /* update epoch for new counter and update epoch_ns from old counter*/
+       /* Update epoch for new counter and update 'epoch_ns' from old counter*/
         new_epoch = read();
-       cyc = read_sched_clock();
-       ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-                         cd.mult, cd.shift);
+       cyc = cd.actual_read_sched_clock();
+       ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+       cd.actual_read_sched_clock = read;
  
-       raw_write_seqcount_begin(&cd.seq);
-       read_sched_clock = read;
-       sched_clock_mask = new_mask;
-       cd.rate = rate;
-       cd.wrap_kt = new_wrap_kt;
-       cd.mult = new_mult;
-       cd.shift = new_shift;
-       cd.epoch_cyc = new_epoch;
-       cd.epoch_ns = ns;
-       raw_write_seqcount_end(&cd.seq);
+       rd.read_sched_clock     = read;
+       rd.sched_clock_mask     = new_mask;
+       rd.mult                 = new_mult;
+       rd.shift                = new_shift;
+       rd.epoch_cyc            = new_epoch;
+       rd.epoch_ns             = ns;
+
+       update_clock_read_data(&rd);
  
         r = rate;
         if (r >= 4000000) {
                 r /= 1000000;
                 r_unit = 'M';
-       } else if (r >= 1000) {
-               r /= 1000;
-               r_unit = 'k';
-       } else
-               r_unit = ' ';
-
-       /* calculate the ns resolution of this counter */
+       } else {
+               if (r >= 1000) {
+                       r /= 1000;
+                       r_unit = 'k';
+               } else {
+                       r_unit = ' ';
+               }
+       }
+
+       /* Calculate the ns resolution of this counter */
         res = cyc_to_ns(1ULL, new_mult, new_shift);
  
         pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
                 bits, r, r_unit, res, wrap);
  
-       /* Enable IRQ time accounting if we have a fast enough sched_clock */
+       /* Enable IRQ time accounting if we have a fast enough sched_clock() */
         if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
                 enable_sched_clock_irqtime();
  
@@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
  void __init sched_clock_postinit(void)
  {
         /*
-        * If no sched_clock function has been provided at that point,
+        * If no sched_clock() function has been provided at that point,
          * make it the final one one.
          */
-       if (read_sched_clock == jiffy_sched_clock_read)
+       if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
                 sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
  
         update_sched_clock();
@@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)
         hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
  }
  
+/*
+ * Clock read function for use when the clock is suspended.
+ *
+ * This function makes it appear to sched_clock() as if the clock
+ * stopped counting at its last update.
+ *
+ * This function must only be called from the critical
+ * section in sched_clock(). It relies on the read_seqcount_retry()
+ * at the end of the critical section to be sure we observe the
+ * correct copy of 'epoch_cyc'.
+ */
+static u64 notrace suspended_sched_clock_read(void)
+{
+       unsigned long seq = raw_read_seqcount(&cd.seq);
+
+       return cd.read_data[seq & 1].epoch_cyc;
+}
+
  static int sched_clock_suspend(void)
  {
+       struct clock_read_data *rd = &cd.read_data[0];
+
         update_sched_clock();
         hrtimer_cancel(&sched_clock_timer);
-       cd.suspended = true;
+       rd->read_sched_clock = suspended_sched_clock_read;
+
         return 0;
  }
  
  static void sched_clock_resume(void)
  {
-       cd.epoch_cyc = read_sched_clock();
+       struct clock_read_data *rd = &cd.read_data[0];
+
+       rd->epoch_cyc = cd.actual_read_sched_clock();
         hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
-       cd.suspended = false;
+       rd->read_sched_clock = cd.actual_read_sched_clock;
  }
  
  static struct syscore_ops sched_clock_ops = {
-       .suspend = sched_clock_suspend,
-       .resume = sched_clock_resume,
+       .suspend        = sched_clock_suspend,
+       .resume         = sched_clock_resume,
  };
  
  static int __init sched_clock_syscore_init(void)
  {
         register_syscore_ops(&sched_clock_ops);
+
         return 0;
  }
  device_initcall(sched_clock_syscore_init);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c

index 066f0ec05e487396315356df0ea04c8563ffa390..f0f8ee9dbc28d063e57dedfdc1fb780a5e8bcbd1 100644 (file)
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -303,7 +303,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
         /*
          * The device is in periodic mode. No reprogramming necessary:
          */
-       if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
+       if (dev->state == CLOCK_EVT_STATE_PERIODIC)
                 goto unlock;
  
         /*
@@ -464,7 +464,7 @@ int tick_resume_broadcast(void)
         bc = tick_broadcast_device.evtdev;
  
         if (bc) {
-               clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
+               clockevents_tick_resume(bc);
  
                 switch (tick_broadcast_device.mode) {
                 case TICKDEV_MODE_PERIODIC:
@@ -532,8 +532,8 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
  {
         int ret;
  
-       if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
-               clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+       if (bc->state != CLOCK_EVT_STATE_ONESHOT)
+               clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
  
         ret = clockevents_program_event(bc, expires, force);
         if (!ret)
@@ -543,7 +543,7 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
  
  int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  {
-       clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
         return 0;
  }
  
@@ -562,8 +562,8 @@ void tick_check_oneshot_broadcast_this_cpu(void)
                  * switched over, leave the device alone.
                  */
                 if (td->mode == TICKDEV_MODE_ONESHOT) {
-                       clockevents_set_mode(td->evtdev,
-                                            CLOCK_EVT_MODE_ONESHOT);
+                       clockevents_set_state(td->evtdev,
+                                             CLOCK_EVT_STATE_ONESHOT);
                 }
         }
  }
@@ -666,7 +666,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
                 if (dev->next_event.tv64 < bc->next_event.tv64)
                         return;
         }
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
  }
  
  static void broadcast_move_bc(int deadcpu)
@@ -741,7 +741,7 @@ int tick_broadcast_oneshot_control(unsigned long reason)
                         cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
         } else {
                 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
-                       clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+                       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
                         /*
                          * The cpu which was handling the broadcast
                          * timer marked this cpu in the broadcast
@@ -842,7 +842,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
  
         /* Set it up only once ! */
         if (bc->event_handler != tick_handle_oneshot_broadcast) {
-               int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
+               int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC;
  
                 bc->event_handler = tick_handle_oneshot_broadcast;
  
@@ -858,7 +858,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
                            tick_broadcast_oneshot_mask, tmpmask);
  
                 if (was_periodic && !cpumask_empty(tmpmask)) {
-                       clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+                       clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
                         tick_broadcast_init_next_event(tmpmask,
                                                        tick_next_period);
                         tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c

index f7c515595b42b2bf9794a8f3f4ee1f9a2c17df89..a5b877130ae98a5c92747af2f7ea3c8659160fcd 100644 (file)
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -102,7 +102,7 @@ void tick_handle_periodic(struct clock_event_device *dev)
  
         tick_periodic(cpu);
  
-       if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
+       if (dev->state != CLOCK_EVT_STATE_ONESHOT)
                 return;
         for (;;) {
                 /*
@@ -140,7 +140,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
  
         if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
             !tick_broadcast_oneshot_active()) {
-               clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
+               clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
         } else {
                 unsigned long seq;
                 ktime_t next;
@@ -150,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
                         next = tick_next_period;
                 } while (read_seqretry(&jiffies_lock, seq));
  
-               clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+               clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
  
                 for (;;) {
                         if (!clockevents_program_event(dev, next, false))
@@ -365,6 +365,7 @@ void tick_shutdown(unsigned int *cpup)
                  * Prevent that the clock events layer tries to call
                  * the set mode function!
                  */
+               dev->state = CLOCK_EVT_STATE_DETACHED;
                 dev->mode = CLOCK_EVT_MODE_UNUSED;
                 clockevents_exchange_device(dev, NULL);
                 dev->event_handler = clockevents_handle_noop;
@@ -384,7 +385,7 @@ void tick_resume(void)
         struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
         int broadcast = tick_resume_broadcast();
  
-       clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
+       clockevents_tick_resume(td->evtdev);
  
         if (!broadcast) {
                 if (td->mode == TICKDEV_MODE_PERIODIC)
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h

index 366aeb4f2c6696ee6239e501ea4904f3812cd44c..98700e4a2000a3a1f6a658028b189a53bcee7b75 100644 (file)
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -32,6 +32,7 @@ extern bool tick_check_replacement(struct clock_event_device *curdev,
  extern void tick_install_replacement(struct clock_event_device *dev);
  
  extern void clockevents_shutdown(struct clock_event_device *dev);
+extern int clockevents_tick_resume(struct clock_event_device *dev);
  
  extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
  
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c

index 7ce740e78e1b506b155c07e3ac50a9a96e6b262d..67a64b1670bfdb984c7d9edec34f7eadd04800ec 100644 (file)
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -38,7 +38,7 @@ void tick_resume_oneshot(void)
  {
         struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
  
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
         clockevents_program_event(dev, ktime_get(), true);
  }
  
@@ -50,7 +50,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
                         ktime_t next_event)
  {
         newdev->event_handler = handler;
-       clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT);
         clockevents_program_event(newdev, next_event, true);
  }
  
@@ -81,7 +81,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
  
         td->mode = TICKDEV_MODE_ONESHOT;
         dev->event_handler = handler;
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
         tick_broadcast_switch_to_oneshot();
         return 0;
  }
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c

index 91db94136c1062571ba0d0f1bfd1ed687770af3e..c3fcff06d30ac64bf72a24764c5dce9566a3ab19 100644 (file)
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -59,6 +59,7 @@ struct tk_fast {
  };
  
  static struct tk_fast tk_fast_mono ____cacheline_aligned;
+static struct tk_fast tk_fast_raw  ____cacheline_aligned;
  
  /* flag for if timekeeping is suspended */
  int __read_mostly timekeeping_suspended;
@@ -68,8 +69,8 @@ bool __read_mostly persistent_clock_exist = false;
  
  static inline void tk_normalize_xtime(struct timekeeper *tk)
  {
-       while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
-               tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
+       while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
+               tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
                 tk->xtime_sec++;
         }
  }
@@ -79,20 +80,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
         struct timespec64 ts;
  
         ts.tv_sec = tk->xtime_sec;
-       ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+       ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
         return ts;
  }
  
  static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
  {
         tk->xtime_sec = ts->tv_sec;
-       tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
+       tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
  }
  
  static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
  {
         tk->xtime_sec += ts->tv_sec;
-       tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
+       tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
         tk_normalize_xtime(tk);
  }
  
@@ -118,6 +119,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
         tk->offs_boot = ktime_add(tk->offs_boot, delta);
  }
  
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+/*
+ * These simple flag variables are managed
+ * without locks, which is racy, but ok since
+ * we don't really care about being super
+ * precise about how many events were seen,
+ * just that a problem was observed.
+ */
+static int timekeeping_underflow_seen;
+static int timekeeping_overflow_seen;
+
+/* last_warning is only modified under the timekeeping lock */
+static long timekeeping_last_warning;
+
+static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+
+       cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
+       const char *name = tk->tkr_mono.clock->name;
+
+       if (offset > max_cycles) {
+               printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+                               offset, name, max_cycles);
+               printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+       } else {
+               if (offset > (max_cycles >> 1)) {
+                       printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
+                                       offset, name, max_cycles >> 1);
+                       printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+               }
+       }
+
+       if (timekeeping_underflow_seen) {
+               if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+                       printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+                       printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+                       printk_deferred("         Your kernel is probably still fine.\n");
+                       timekeeping_last_warning = jiffies;
+               }
+               timekeeping_underflow_seen = 0;
+       }
+
+       if (timekeeping_overflow_seen) {
+               if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+                       printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+                       printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+                       printk_deferred("         Your kernel is probably still fine.\n");
+                       timekeeping_last_warning = jiffies;
+               }
+               timekeeping_overflow_seen = 0;
+       }
+}
+
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+       cycle_t now, last, mask, max, delta;
+       unsigned int seq;
+
+       /*
+        * Since we're called holding a seqlock, the data may shift
+        * under us while we're doing the calculation. This can cause
+        * false positives, since we'd note a problem but throw the
+        * results away. So nest another seqlock here to atomically
+        * grab the points we are checking with.
+        */
+       do {
+               seq = read_seqcount_begin(&tk_core.seq);
+               now = tkr->read(tkr->clock);
+               last = tkr->cycle_last;
+               mask = tkr->mask;
+               max = tkr->clock->max_cycles;
+       } while (read_seqcount_retry(&tk_core.seq, seq));
+
+       delta = clocksource_delta(now, last, mask);
+
+       /*
+        * Try to catch underflows by checking if we are seeing small
+        * mask-relative negative values.
+        */
+       if (unlikely((~delta & mask) < (mask >> 3))) {
+               timekeeping_underflow_seen = 1;
+               delta = 0;
+       }
+
+       /* Cap delta value to the max_cycles values to avoid mult overflows */
+       if (unlikely(delta > max)) {
+               timekeeping_overflow_seen = 1;
+               delta = tkr->clock->max_cycles;
+       }
+
+       return delta;
+}
+#else
+static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+}
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+       cycle_t cycle_now, delta;
+
+       /* read clocksource */
+       cycle_now = tkr->read(tkr->clock);
+
+       /* calculate the delta since the last update_wall_time */
+       delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+
+       return delta;
+}
+#endif
+
  /**
   * tk_setup_internals - Set up internals to use clocksource clock.
   *
@@ -135,11 +247,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
         u64 tmp, ntpinterval;
         struct clocksource *old_clock;
  
-       old_clock = tk->tkr.clock;
-       tk->tkr.clock = clock;
-       tk->tkr.read = clock->read;
-       tk->tkr.mask = clock->mask;
-       tk->tkr.cycle_last = tk->tkr.read(clock);
+       old_clock = tk->tkr_mono.clock;
+       tk->tkr_mono.clock = clock;
+       tk->tkr_mono.read = clock->read;
+       tk->tkr_mono.mask = clock->mask;
+       tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+
+       tk->tkr_raw.clock = clock;
+       tk->tkr_raw.read = clock->read;
+       tk->tkr_raw.mask = clock->mask;
+       tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
  
         /* Do the ns -> cycle conversion first, using original mult */
         tmp = NTP_INTERVAL_LENGTH;
@@ -163,11 +280,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
         if (old_clock) {
                 int shift_change = clock->shift - old_clock->shift;
                 if (shift_change < 0)
-                       tk->tkr.xtime_nsec >>= -shift_change;
+                       tk->tkr_mono.xtime_nsec >>= -shift_change;
                 else
-                       tk->tkr.xtime_nsec <<= shift_change;
+                       tk->tkr_mono.xtime_nsec <<= shift_change;
         }
-       tk->tkr.shift = clock->shift;
+       tk->tkr_raw.xtime_nsec = 0;
+
+       tk->tkr_mono.shift = clock->shift;
+       tk->tkr_raw.shift = clock->shift;
  
         tk->ntp_error = 0;
         tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -178,7 +298,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
          * active clocksource. These value will be adjusted via NTP
          * to counteract clock drifting.
          */
-       tk->tkr.mult = clock->mult;
+       tk->tkr_mono.mult = clock->mult;
+       tk->tkr_raw.mult = clock->mult;
         tk->ntp_err_mult = 0;
  }
  
@@ -193,14 +314,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
  
  static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
  {
-       cycle_t cycle_now, delta;
+       cycle_t delta;
         s64 nsec;
  
-       /* read clocksource: */
-       cycle_now = tkr->read(tkr->clock);
-
-       /* calculate the delta since the last update_wall_time: */
-       delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+       delta = timekeeping_get_delta(tkr);
  
         nsec = delta * tkr->mult + tkr->xtime_nsec;
         nsec >>= tkr->shift;
@@ -209,25 +326,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
         return nsec + arch_gettimeoffset();
  }
  
-static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
-{
-       struct clocksource *clock = tk->tkr.clock;
-       cycle_t cycle_now, delta;
-       s64 nsec;
-
-       /* read clocksource: */
-       cycle_now = tk->tkr.read(clock);
-
-       /* calculate the delta since the last update_wall_time: */
-       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-
-       /* convert delta to nanoseconds. */
-       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-
-       /* If arch requires, add in get_arch_timeoffset() */
-       return nsec + arch_gettimeoffset();
-}
-
  /**
   * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
   * @tkr: Timekeeping readout base from which we take the update
@@ -267,18 +365,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
   * slightly wrong timestamp (a few nanoseconds). See
   * @ktime_get_mono_fast_ns.
   */
-static void update_fast_timekeeper(struct tk_read_base *tkr)
+static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
  {
-       struct tk_read_base *base = tk_fast_mono.base;
+       struct tk_read_base *base = tkf->base;
  
         /* Force readers off to base[1] */
-       raw_write_seqcount_latch(&tk_fast_mono.seq);
+       raw_write_seqcount_latch(&tkf->seq);
  
         /* Update base[0] */
         memcpy(base, tkr, sizeof(*base));
  
         /* Force readers back to base[0] */
-       raw_write_seqcount_latch(&tk_fast_mono.seq);
+       raw_write_seqcount_latch(&tkf->seq);
  
         /* Update base[1] */
         memcpy(base + 1, base, sizeof(*base));
@@ -316,22 +414,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
   * of the following timestamps. Callers need to be aware of that and
   * deal with it.
   */
-u64 notrace ktime_get_mono_fast_ns(void)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
  {
         struct tk_read_base *tkr;
         unsigned int seq;
         u64 now;
  
         do {
-               seq = raw_read_seqcount(&tk_fast_mono.seq);
-               tkr = tk_fast_mono.base + (seq & 0x01);
-               now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+               seq = raw_read_seqcount(&tkf->seq);
+               tkr = tkf->base + (seq & 0x01);
+               now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+       } while (read_seqcount_retry(&tkf->seq, seq));
  
-       } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
         return now;
  }
+
+u64 ktime_get_mono_fast_ns(void)
+{
+       return __ktime_get_fast_ns(&tk_fast_mono);
+}
  EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
  
+u64 ktime_get_raw_fast_ns(void)
+{
+       return __ktime_get_fast_ns(&tk_fast_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+
  /* Suspend-time cycles value for halted fast timekeeper. */
  static cycle_t cycles_at_suspend;
  
@@ -353,12 +462,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
  static void halt_fast_timekeeper(struct timekeeper *tk)
  {
         static struct tk_read_base tkr_dummy;
-       struct tk_read_base *tkr = &tk->tkr;
+       struct tk_read_base *tkr = &tk->tkr_mono;
  
         memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
         cycles_at_suspend = tkr->read(tkr->clock);
         tkr_dummy.read = dummy_clock_read;
-       update_fast_timekeeper(&tkr_dummy);
+       update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
+
+       tkr = &tk->tkr_raw;
+       memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
+       tkr_dummy.read = dummy_clock_read;
+       update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
  }
  
  #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -369,8 +483,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
  
         xt = timespec64_to_timespec(tk_xtime(tk));
         wm = timespec64_to_timespec(tk->wall_to_monotonic);
-       update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
-                           tk->tkr.cycle_last);
+       update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
+                           tk->tkr_mono.cycle_last);
  }
  
  static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -387,11 +501,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
         * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
         * users are removed, this can be killed.
         */
-       remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
-       tk->tkr.xtime_nsec -= remainder;
-       tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
+       remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
+       tk->tkr_mono.xtime_nsec -= remainder;
+       tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
         tk->ntp_error += remainder << tk->ntp_error_shift;
-       tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
+       tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
  }
  #else
  #define old_vsyscall_fixup(tk)
@@ -456,17 +570,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
          */
         seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
         nsec = (u32) tk->wall_to_monotonic.tv_nsec;
-       tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+       tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
  
         /* Update the monotonic raw base */
-       tk->base_raw = timespec64_to_ktime(tk->raw_time);
+       tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
  
         /*
          * The sum of the nanoseconds portions of xtime and
          * wall_to_monotonic can be greater/equal one second. Take
          * this into account before updating tk->ktime_sec.
          */
-       nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+       nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
         if (nsec >= NSEC_PER_SEC)
                 seconds++;
         tk->ktime_sec = seconds;
@@ -489,7 +603,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
                 memcpy(&shadow_timekeeper, &tk_core.timekeeper,
                        sizeof(tk_core.timekeeper));
  
-       update_fast_timekeeper(&tk->tkr);
+       update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+       update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
  }
  
  /**
@@ -501,22 +616,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
   */
  static void timekeeping_forward_now(struct timekeeper *tk)
  {
-       struct clocksource *clock = tk->tkr.clock;
+       struct clocksource *clock = tk->tkr_mono.clock;
         cycle_t cycle_now, delta;
         s64 nsec;
  
-       cycle_now = tk->tkr.read(clock);
-       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-       tk->tkr.cycle_last = cycle_now;
+       cycle_now = tk->tkr_mono.read(clock);
+       delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
+       tk->tkr_mono.cycle_last = cycle_now;
+       tk->tkr_raw.cycle_last  = cycle_now;
  
-       tk->tkr.xtime_nsec += delta * tk->tkr.mult;
+       tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
  
         /* If arch requires, add in get_arch_timeoffset() */
-       tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
+       tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
  
         tk_normalize_xtime(tk);
  
-       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
+       nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
         timespec64_add_ns(&tk->raw_time, nsec);
  }
  
@@ -537,7 +653,7 @@ int __getnstimeofday64(struct timespec64 *ts)
                 seq = read_seqcount_begin(&tk_core.seq);
  
                 ts->tv_sec = tk->xtime_sec;
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -577,8 +693,8 @@ ktime_t ktime_get(void)
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
-               base = tk->tkr.base_mono;
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               base = tk->tkr_mono.base;
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -603,8 +719,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
-               base = ktime_add(tk->tkr.base_mono, *offset);
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               base = ktime_add(tk->tkr_mono.base, *offset);
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -645,8 +761,8 @@ ktime_t ktime_get_raw(void)
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
-               base = tk->base_raw;
-               nsecs = timekeeping_get_ns_raw(tk);
+               base = tk->tkr_raw.base;
+               nsecs = timekeeping_get_ns(&tk->tkr_raw);
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -674,7 +790,7 @@ void ktime_get_ts64(struct timespec64 *ts)
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
                 ts->tv_sec = tk->xtime_sec;
-               nsec = timekeeping_get_ns(&tk->tkr);
+               nsec = timekeeping_get_ns(&tk->tkr_mono);
                 tomono = tk->wall_to_monotonic;
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -759,8 +875,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
                 ts_real->tv_sec = tk->xtime_sec;
                 ts_real->tv_nsec = 0;
  
-               nsecs_raw = timekeeping_get_ns_raw(tk);
-               nsecs_real = timekeeping_get_ns(&tk->tkr);
+               nsecs_raw  = timekeeping_get_ns(&tk->tkr_raw);
+               nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -943,7 +1059,7 @@ static int change_clocksource(void *data)
          */
         if (try_module_get(new->owner)) {
                 if (!new->enable || new->enable(new) == 0) {
-                       old = tk->tkr.clock;
+                       old = tk->tkr_mono.clock;
                         tk_setup_internals(tk, new);
                         if (old->disable)
                                 old->disable(old);
@@ -971,11 +1087,11 @@ int timekeeping_notify(struct clocksource *clock)
  {
         struct timekeeper *tk = &tk_core.timekeeper;
  
-       if (tk->tkr.clock == clock)
+       if (tk->tkr_mono.clock == clock)
                 return 0;
         stop_machine(change_clocksource, clock, NULL);
         tick_clock_notify();
-       return tk->tkr.clock == clock ? 0 : -1;
+       return tk->tkr_mono.clock == clock ? 0 : -1;
  }
  
  /**
@@ -993,7 +1109,7 @@ void getrawmonotonic64(struct timespec64 *ts)
  
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
-               nsecs = timekeeping_get_ns_raw(tk);
+               nsecs = timekeeping_get_ns(&tk->tkr_raw);
                 ts64 = tk->raw_time;
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1016,7 +1132,7 @@ int timekeeping_valid_for_hres(void)
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
  
-               ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+               ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -1035,7 +1151,7 @@ u64 timekeeping_max_deferment(void)
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
  
-               ret = tk->tkr.clock->max_idle_ns;
+               ret = tk->tkr_mono.clock->max_idle_ns;
  
         } while (read_seqcount_retry(&tk_core.seq, seq));
  
@@ -1114,7 +1230,6 @@ void __init timekeeping_init(void)
         tk_set_xtime(tk, &now);
         tk->raw_time.tv_sec = 0;
         tk->raw_time.tv_nsec = 0;
-       tk->base_raw.tv64 = 0;
         if (boot.tv_sec == 0 && boot.tv_nsec == 0)
                 boot = tk_xtime(tk);
  
@@ -1200,7 +1315,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
  void timekeeping_resume(void)
  {
         struct timekeeper *tk = &tk_core.timekeeper;
-       struct clocksource *clock = tk->tkr.clock;
+       struct clocksource *clock = tk->tkr_mono.clock;
         unsigned long flags;
         struct timespec64 ts_new, ts_delta;
         struct timespec tmp;
@@ -1228,16 +1343,16 @@ void timekeeping_resume(void)
          * The less preferred source will only be tried if there is no better
          * usable source. The rtc part is handled separately in rtc core code.
          */
-       cycle_now = tk->tkr.read(clock);
+       cycle_now = tk->tkr_mono.read(clock);
         if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-               cycle_now > tk->tkr.cycle_last) {
+               cycle_now > tk->tkr_mono.cycle_last) {
                 u64 num, max = ULLONG_MAX;
                 u32 mult = clock->mult;
                 u32 shift = clock->shift;
                 s64 nsec = 0;
  
-               cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
-                                               tk->tkr.mask);
+               cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
+                                               tk->tkr_mono.mask);
  
                 /*
                  * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1263,7 +1378,9 @@ void timekeeping_resume(void)
                 __timekeeping_inject_sleeptime(tk, &ts_delta);
  
         /* Re-base the last cycle value */
-       tk->tkr.cycle_last = cycle_now;
+       tk->tkr_mono.cycle_last = cycle_now;
+       tk->tkr_raw.cycle_last  = cycle_now;
+
         tk->ntp_error = 0;
         timekeeping_suspended = 0;
         timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1416,15 +1533,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
          *
          * XXX - TODO: Doc ntp_error calculation.
          */
-       if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
+       if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
                 /* NTP adjustment caused clocksource mult overflow */
                 WARN_ON_ONCE(1);
                 return;
         }
  
-       tk->tkr.mult += mult_adj;
+       tk->tkr_mono.mult += mult_adj;
         tk->xtime_interval += interval;
-       tk->tkr.xtime_nsec -= offset;
+       tk->tkr_mono.xtime_nsec -= offset;
         tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
  }
  
@@ -1486,13 +1603,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
                 tk->ntp_err_mult = 0;
         }
  
-       if (unlikely(tk->tkr.clock->maxadj &&
-               (abs(tk->tkr.mult - tk->tkr.clock->mult)
-                       > tk->tkr.clock->maxadj))) {
+       if (unlikely(tk->tkr_mono.clock->maxadj &&
+               (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
+                       > tk->tkr_mono.clock->maxadj))) {
                 printk_once(KERN_WARNING
                         "Adjusting %s more than 11%% (%ld vs %ld)\n",
-                       tk->tkr.clock->name, (long)tk->tkr.mult,
-                       (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+                       tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
+                       (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
         }
  
         /*
@@ -1509,9 +1626,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
          * We'll correct this error next time through this function, when
          * xtime_nsec is not as small.
          */
-       if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
-               s64 neg = -(s64)tk->tkr.xtime_nsec;
-               tk->tkr.xtime_nsec = 0;
+       if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
+               s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
+               tk->tkr_mono.xtime_nsec = 0;
                 tk->ntp_error += neg << tk->ntp_error_shift;
         }
  }
@@ -1526,13 +1643,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
   */
  static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
  {
-       u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
+       u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
         unsigned int clock_set = 0;
  
-       while (tk->tkr.xtime_nsec >= nsecps) {
+       while (tk->tkr_mono.xtime_nsec >= nsecps) {
                 int leap;
  
-               tk->tkr.xtime_nsec -= nsecps;
+               tk->tkr_mono.xtime_nsec -= nsecps;
                 tk->xtime_sec++;
  
                 /* Figure out if its a leap sec and apply if needed */
@@ -1577,9 +1694,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
  
         /* Accumulate one shifted interval */
         offset -= interval;
-       tk->tkr.cycle_last += interval;
+       tk->tkr_mono.cycle_last += interval;
+       tk->tkr_raw.cycle_last  += interval;
  
-       tk->tkr.xtime_nsec += tk->xtime_interval << shift;
+       tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
         *clock_set |= accumulate_nsecs_to_secs(tk);
  
         /* Accumulate raw time */
@@ -1622,14 +1740,17 @@ void update_wall_time(void)
  #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
         offset = real_tk->cycle_interval;
  #else
-       offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
-                                  tk->tkr.cycle_last, tk->tkr.mask);
+       offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+                                  tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
  #endif
  
         /* Check if there's really nothing to do */
         if (offset < real_tk->cycle_interval)
                 goto out;
  
+       /* Do some additional sanity checking */
+       timekeeping_check_update(real_tk, offset);
+
         /*
          * With NO_HZ we may have to accumulate many cycle_intervals
          * (think "ticks") worth of time at once. To do this efficiently,
@@ -1784,8 +1905,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
  
-               base = tk->tkr.base_mono;
-               nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+               base = tk->tkr_mono.base;
+               nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
  
                 *offs_real = tk->offs_real;
                 *offs_boot = tk->offs_boot;
@@ -1816,8 +1937,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
         do {
                 seq = read_seqcount_begin(&tk_core.seq);
  
-               base = tk->tkr.base_mono;
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               base = tk->tkr_mono.base;
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
  
                 *offs_real = tk->offs_real;
                 *offs_boot = tk->offs_boot;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c

index 61ed862cdd376222dedfa317301f3d6c3dbd3404..05aa5590106aeecc1d0eb30238bd071dd06f03cf 100644 (file)
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
         print_name_offset(m, dev->set_next_event);
         SEQ_printf(m, "\n");
  
-       SEQ_printf(m, " set_mode:       ");
-       print_name_offset(m, dev->set_mode);
-       SEQ_printf(m, "\n");
+       if (dev->set_mode) {
+               SEQ_printf(m, " set_mode:       ");
+               print_name_offset(m, dev->set_mode);
+               SEQ_printf(m, "\n");
+       } else {
+               if (dev->set_state_shutdown) {
+                       SEQ_printf(m, " shutdown: ");
+                       print_name_offset(m, dev->set_state_shutdown);
+                       SEQ_printf(m, "\n");
+               }
+
+               if (dev->set_state_periodic) {
+                       SEQ_printf(m, " periodic: ");
+                       print_name_offset(m, dev->set_state_periodic);
+                       SEQ_printf(m, "\n");
+               }
+
+               if (dev->set_state_oneshot) {
+                       SEQ_printf(m, " oneshot:  ");
+                       print_name_offset(m, dev->set_state_oneshot);
+                       SEQ_printf(m, "\n");
+               }
+
+               if (dev->tick_resume) {
+                       SEQ_printf(m, " resume:   ");
+                       print_name_offset(m, dev->tick_resume);
+                       SEQ_printf(m, "\n");
+               }
+       }
  
         SEQ_printf(m, " event_handler:  ");
         print_name_offset(m, dev->event_handler);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index c5cefb3c009ce9cd51199dc5fef683d7bc9b1bdc..36b6fa88ce5b412f92b15da530772c5a058e5d12 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
           data corruption or a sporadic crash at a later stage once the region
           is examined. The runtime overhead introduced is minimal.
  
+config DEBUG_TIMEKEEPING
+       bool "Enable extra timekeeping sanity checking"
+       help
+         This option will enable additional timekeeping sanity checks
+         which may be helpful when diagnosing issues where timekeeping
+         problems are suspected.
+
+         This may include checks in the timekeeping hotpaths, so this
+         option may have a (very small) performance impact to some
+         workloads.
+
+         If unsure, say N.
+
  config TIMER_STATS
         bool "Collect kernel timers statistics"
         depends on DEBUG_KERNEL && PROC_FS
author	Ingo Molnar <mingo@kernel.org>
	Tue, 31 Mar 2015 07:08:13 +0000 (09:08 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Tue, 31 Mar 2015 07:08:13 +0000 (09:08 +0200)
arch/arm/common/bL_switcher.c		patch \| blob \| history
arch/arm/plat-omap/counter_32k.c		patch \| blob \| history
arch/arm64/kernel/vdso.c		patch \| blob \| history
arch/s390/kernel/time.c		patch \| blob \| history
arch/sparc/kernel/time_32.c		patch \| blob \| history
arch/tile/kernel/time.c		patch \| blob \| history
arch/x86/kernel/vsyscall_gtod.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
drivers/clocksource/em_sti.c		patch \| blob \| history
drivers/clocksource/sh_cmt.c		patch \| blob \| history
drivers/clocksource/sh_tmu.c		patch \| blob \| history
include/linux/clockchips.h		patch \| blob \| history
include/linux/clocksource.h		patch \| blob \| history
include/linux/timekeeper_internal.h		patch \| blob \| history
include/linux/timekeeping.h		patch \| blob \| history
kernel/time/clockevents.c		patch \| blob \| history
kernel/time/clocksource.c		patch \| blob \| history
kernel/time/jiffies.c		patch \| blob \| history
kernel/time/sched_clock.c		patch \| blob \| history
kernel/time/tick-broadcast.c		patch \| blob \| history
kernel/time/tick-common.c		patch \| blob \| history
kernel/time/tick-internal.h		patch \| blob \| history
kernel/time/tick-oneshot.c		patch \| blob \| history
kernel/time/timekeeping.c		patch \| blob \| history
kernel/time/timer_list.c		patch \| blob \| history
lib/Kconfig.debug		patch \| blob \| history