x86: vdso: pvclock gettime support
authorMarcelo Tosatti <mtosatti@redhat.com>
Wed, 28 Nov 2012 01:28:57 +0000 (23:28 -0200)
committerMarcelo Tosatti <mtosatti@redhat.com>
Wed, 28 Nov 2012 01:29:11 +0000 (23:29 -0200)
Improve performance of time system calls when using Linux pvclock,
by reading time info from fixmap visible copy of pvclock data.

Originally from Jeremy Fitzhardinge.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
arch/x86/include/asm/vsyscall.h
arch/x86/vdso/vclock_gettime.c
arch/x86/vdso/vgetcpu.c

index eaea1d31f753092cd19ff6cf126f6c7c0f7cedb7..80f80955cfd8a781d25490f835add893bc0cf898 100644 (file)
@@ -33,6 +33,26 @@ extern void map_vsyscall(void);
  */
 extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
 
+#ifdef CONFIG_X86_64
+
+#define VGETCPU_CPU_MASK 0xfff
+
+static inline unsigned int __getcpu(void)
+{
+       unsigned int p;
+
+       if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
+               /* Load per CPU data from RDTSCP */
+               native_read_tscp(&p);
+       } else {
+               /* Load per CPU data from GDT */
+               asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+       }
+
+       return p;
+}
+#endif /* CONFIG_X86_64 */
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_VSYSCALL_H */
index 4df6c373421a435dfeac34d68407ce10602d688d..205ad328aa52810975dd332771a16911f5e6935b 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/hpet.h>
 #include <asm/unistd.h>
 #include <asm/io.h>
+#include <asm/pvclock.h>
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
@@ -62,6 +63,76 @@ static notrace cycle_t vread_hpet(void)
        return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
 }
 
+#ifdef CONFIG_PARAVIRT_CLOCK
+
+static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
+{
+       const struct pvclock_vsyscall_time_info *pvti_base;
+       int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
+       int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
+
+       BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
+
+       pvti_base = (struct pvclock_vsyscall_time_info *)
+                   __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
+
+       return &pvti_base[offset];
+}
+
+static notrace cycle_t vread_pvclock(int *mode)
+{
+       const struct pvclock_vsyscall_time_info *pvti;
+       cycle_t ret;
+       u64 last;
+       u32 version;
+       u32 migrate_count;
+       u8 flags;
+       unsigned cpu, cpu1;
+
+
+       /*
+        * When looping to get a consistent (time-info, tsc) pair, we
+        * also need to deal with the possibility we can switch vcpus,
+        * so make sure we always re-fetch time-info for the current vcpu.
+        */
+       do {
+               cpu = __getcpu() & VGETCPU_CPU_MASK;
+               /* TODO: We can put vcpu id into higher bits of pvti.version.
+                * This will save a couple of cycles by getting rid of
+                * __getcpu() calls (Gleb).
+                */
+
+               pvti = get_pvti(cpu);
+
+               migrate_count = pvti->migrate_count;
+
+               version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
+
+               /*
+                * Test we're still on the cpu as well as the version.
+                * We could have been migrated just after the first
+                * vgetcpu but before fetching the version, so we
+                * wouldn't notice a version change.
+                */
+               cpu1 = __getcpu() & VGETCPU_CPU_MASK;
+       } while (unlikely(cpu != cpu1 ||
+                         (pvti->pvti.version & 1) ||
+                         pvti->pvti.version != version ||
+                         pvti->migrate_count != migrate_count));
+
+       if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
+               *mode = VCLOCK_NONE;
+
+       /* refer to tsc.c read_tsc() comment for rationale */
+       last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+
+       if (likely(ret >= last))
+               return ret;
+
+       return last;
+}
+#endif
+
 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
 {
        long ret;
@@ -80,7 +151,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 }
 
 
-notrace static inline u64 vgetsns(void)
+notrace static inline u64 vgetsns(int *mode)
 {
        long v;
        cycles_t cycles;
@@ -88,6 +159,10 @@ notrace static inline u64 vgetsns(void)
                cycles = vread_tsc();
        else if (gtod->clock.vclock_mode == VCLOCK_HPET)
                cycles = vread_hpet();
+#ifdef CONFIG_PARAVIRT_CLOCK
+       else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
+               cycles = vread_pvclock(mode);
+#endif
        else
                return 0;
        v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
@@ -107,7 +182,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
                mode = gtod->clock.vclock_mode;
                ts->tv_sec = gtod->wall_time_sec;
                ns = gtod->wall_time_snsec;
-               ns += vgetsns();
+               ns += vgetsns(&mode);
                ns >>= gtod->clock.shift;
        } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 
@@ -127,7 +202,7 @@ notrace static int do_monotonic(struct timespec *ts)
                mode = gtod->clock.vclock_mode;
                ts->tv_sec = gtod->monotonic_time_sec;
                ns = gtod->monotonic_time_snsec;
-               ns += vgetsns();
+               ns += vgetsns(&mode);
                ns >>= gtod->clock.shift;
        } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
        timespec_add_ns(ts, ns);
index 5463ad558573de5424d4d654e0cfdc20263930dd..2f94b039e55b6c2da8f40616af7445e0f8454a27 100644 (file)
@@ -17,15 +17,10 @@ __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
        unsigned int p;
 
-       if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
-               /* Load per CPU data from RDTSCP */
-               native_read_tscp(&p);
-       } else {
-               /* Load per CPU data from GDT */
-               asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
-       }
+       p = __getcpu();
+
        if (cpu)
-               *cpu = p & 0xfff;
+               *cpu = p & VGETCPU_CPU_MASK;
        if (node)
                *node = p >> 12;
        return 0;