powerpc: Implement accurate task and CPU time accounting
authorPaul Mackerras <paulus@samba.org>
Thu, 23 Feb 2006 23:06:59 +0000 (10:06 +1100)
committerPaul Mackerras <paulus@samba.org>
Fri, 24 Feb 2006 03:05:56 +0000 (14:05 +1100)
This implements accurate task and cpu time accounting for 64-bit
powerpc kernels.  Instead of accounting a whole jiffy of time to a
task on a timer interrupt because that task happened to be running at
the time, we now account time in units of timebase ticks according to
the actual time spent by the task in user mode and kernel mode.  We
also count the time spent processing hardware and software interrupts
accurately.  This is conditional on CONFIG_VIRT_CPU_ACCOUNTING.  If
that is not set, we do tick-based approximate accounting as before.

To get this accurate information, we read either the PURR (processor
utilization of resources register) on POWER5 machines, or the timebase
on other machines on

* each entry to the kernel from usermode
* each exit to usermode
* transitions between process context, hard irq context and soft irq
  context in kernel mode
* context switches.

On POWER5 systems with shared-processor logical partitioning we also
read both the PURR and the timebase at each timer interrupt and
context switch in order to determine how much time has been taken by
the hypervisor to run other partitions ("steal" time).  Unfortunately,
since we need values of the PURR on both threads at the same time to
accurately calculate the steal time, and since we can only calculate
steal time on a per-core basis, the apportioning of the steal time
between idle time (time which we ceded to the hypervisor in the idle
loop) and actual stolen time is somewhat approximate at the moment.

This is all based quite heavily on what s390 does, and it uses the
generic interfaces that were added by the s390 developers,
i.e. account_system_time(), account_user_time(), etc.

This patch doesn't add any new interfaces between the kernel and
userspace, and doesn't change the units in which time is reported to
userspace by things such as /proc/stat, /proc/<pid>/stat, getrusage(),
times(), etc.  Internally the various task and cpu times are stored in
timebase units, but they are converted to USER_HZ units (1/100th of a
second) when reported to userspace.  Some precision is therefore lost
but there should not be any accumulating error, since the internal
accumulation is at full precision.

Signed-off-by: Paul Mackerras <paulus@samba.org>
16 files changed:
arch/powerpc/Kconfig
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/head_64.S
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/time.c
include/asm-powerpc/cputable.h
include/asm-powerpc/cputime.h
include/asm-powerpc/irq.h
include/asm-powerpc/paca.h
include/asm-powerpc/ppc_asm.h
include/asm-powerpc/system.h
include/asm-powerpc/time.h
include/asm-ppc/time.h

index fb0dcb994b84a62bdcac7963f8f8ed90559801c1..d112aed2999bffd9e8e16045fc8b950df0f35713 100644 (file)
@@ -250,6 +250,21 @@ config PPC_STD_MMU_32
        def_bool y
        depends on PPC_STD_MMU && PPC32
 
+config VIRT_CPU_ACCOUNTING
+       bool "Deterministic task and CPU time accounting"
+       depends on PPC64
+       default y
+       help
+         Select this option to enable more accurate task and CPU time
+         accounting.  This is done by reading a CPU counter on each
+         kernel entry and exit and on transitions within the kernel
+         between system, softirq and hardirq state, so there is a
+         small performance impact.  This also enables accounting of
+         stolen time on logically-partitioned systems running on
+         IBM POWER5-based machines.
+
+         If in doubt, say Y here.
+
 config SMP
        depends on PPC_STD_MMU
        bool "Symmetric multi-processing support"
index 840aad43a98bd218f5c985214e483b5578d6b763..18810ac55bcce80f1e5d4854dcecdda5a1fd96d9 100644 (file)
@@ -137,6 +137,9 @@ int main(void)
        DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
        DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
        DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
+       DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
+       DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
+       DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 
        DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
        DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
index 79a0c910f0d8733aac8cf38052147f3676ac4411..8f606c1889fa1e426abb7eff5255db72c1f6a992 100644 (file)
@@ -61,6 +61,7 @@ system_call_common:
        std     r12,_MSR(r1)
        std     r0,GPR0(r1)
        std     r10,GPR1(r1)
+       ACCOUNT_CPU_USER_ENTRY(r10, r11)
        std     r2,GPR2(r1)
        std     r3,GPR3(r1)
        std     r4,GPR4(r1)
@@ -168,8 +169,9 @@ syscall_error_cont:
        stdcx.  r0,0,r1                 /* to clear the reservation */
        andi.   r6,r8,MSR_PR
        ld      r4,_LINK(r1)
-       beq-    1f                      /* only restore r13 if */
-       ld      r13,GPR13(r1)           /* returning to usermode */
+       beq-    1f
+       ACCOUNT_CPU_USER_EXIT(r11, r12)
+       ld      r13,GPR13(r1)   /* only restore r13 if returning to usermode */
 1:     ld      r2,GPR2(r1)
        li      r12,MSR_RI
        andc    r11,r10,r12
@@ -536,6 +538,7 @@ restore:
         * userspace
         */
        beq     1f
+       ACCOUNT_CPU_USER_EXIT(r3, r4)
        REST_GPR(13, r1)
 1:
        ld      r3,_CTR(r1)
index 2b21ec4992854a6205199cbc0ba14cfa220ec0eb..be3ae7733577475064fdb2168ff14c5a13cbc83a 100644 (file)
@@ -277,6 +277,7 @@ exception_marker:
        std     r10,0(r1);              /* make stack chain pointer     */ \
        std     r0,GPR0(r1);            /* save r0 in stackframe        */ \
        std     r10,GPR1(r1);           /* save r1 in stackframe        */ \
+       ACCOUNT_CPU_USER_ENTRY(r9, r10);                                   \
        std     r2,GPR2(r1);            /* save r2 in stackframe        */ \
        SAVE_4GPRS(3, r1);              /* save r3 - r6 in stackframe   */ \
        SAVE_2GPRS(7, r1);              /* save r7, r8 in stackframe    */ \
@@ -844,6 +845,14 @@ fast_exception_return:
        ld      r11,_NIP(r1)
        andi.   r3,r12,MSR_RI           /* check if RI is set */
        beq-    unrecov_fer
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+       andi.   r3,r12,MSR_PR
+       beq     2f
+       ACCOUNT_CPU_USER_EXIT(r3, r4)
+2:
+#endif
+
        ld      r3,_CCR(r1)
        ld      r4,_LINK(r1)
        ld      r5,_CTR(r1)
index edb2b00edbd21a93ca1d4d49580cf3b1df40c24e..24dc8117b822e5228098438711c2b1f6ae420025 100644 (file)
@@ -369,6 +369,7 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
        return NO_IRQ;
 
 }
+#endif /* CONFIG_PPC64 */
 
 #ifdef CONFIG_IRQSTACKS
 struct thread_info *softirq_ctx[NR_CPUS];
@@ -392,10 +393,24 @@ void irq_ctx_init(void)
        }
 }
 
+static inline void do_softirq_onstack(void)
+{
+       struct thread_info *curtp, *irqtp;
+
+       curtp = current_thread_info();
+       irqtp = softirq_ctx[smp_processor_id()];
+       irqtp->task = curtp->task;
+       call_do_softirq(irqtp);
+       irqtp->task = NULL;
+}
+
+#else
+#define do_softirq_onstack()   __do_softirq()
+#endif /* CONFIG_IRQSTACKS */
+
 void do_softirq(void)
 {
        unsigned long flags;
-       struct thread_info *curtp, *irqtp;
 
        if (in_interrupt())
                return;
@@ -403,19 +418,18 @@ void do_softirq(void)
        local_irq_save(flags);
 
        if (local_softirq_pending()) {
-               curtp = current_thread_info();
-               irqtp = softirq_ctx[smp_processor_id()];
-               irqtp->task = curtp->task;
-               call_do_softirq(irqtp);
-               irqtp->task = NULL;
+               account_system_vtime(current);
+               local_bh_disable();
+               do_softirq_onstack();
+               account_system_vtime(current);
+               __local_bh_enable();
        }
 
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL(do_softirq);
 
-#endif /* CONFIG_IRQSTACKS */
-
+#ifdef CONFIG_PPC64
 static int __init setup_noirqdistrib(char *str)
 {
        distribute_irqs = 0;
index dd774c3c9302450635d41b84e5298270b75fd77b..1770a066c2176f71afa3e736904d8744bf0e0b84 100644 (file)
@@ -45,9 +45,9 @@
 #include <asm/mmu.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
+#include <asm/time.h>
 #ifdef CONFIG_PPC64
 #include <asm/firmware.h>
-#include <asm/time.h>
 #endif
 
 extern unsigned long _get_SP(void);
@@ -328,6 +328,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
 #endif
 
        local_irq_save(flags);
+
+       account_system_vtime(current);
+       account_process_vtime(current);
+       calculate_steal_time();
+
        last = _switch(old_thread, new_thread);
 
        local_irq_restore(flags);
index 13595a64f013a2c1b31f2ce0d7cf86be2b1c3ae9..805eaedbc3084e3ea1c22d594705c6acb24fb514 100644 (file)
@@ -541,7 +541,7 @@ int __devinit start_secondary(void *unused)
                smp_ops->take_timebase();
 
        if (system_state > SYSTEM_BOOTING)
-               per_cpu(last_jiffy, cpu) = get_tb();
+               snapshot_timebase();
 
        spin_lock(&call_lock);
        cpu_set(cpu, cpu_online_map);
@@ -573,6 +573,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
        set_cpus_allowed(current, old_mask);
 
+       snapshot_timebases();
+
        dump_numa_cpu_topology();
 }
 
index 2a7ddc5793797ecb9d2146e415dee6740e815d49..0b34db28916fd1fc038aaed7194f692f9fdd7172 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/percpu.h>
 #include <linux/rtc.h>
 #include <linux/jiffies.h>
+#include <linux/posix-timers.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -135,6 +136,220 @@ unsigned long tb_last_stamp;
  */
 DEFINE_PER_CPU(unsigned long, last_jiffy);
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Factors for converting from cputime_t (timebase ticks) to
+ * jiffies, milliseconds, seconds, and clock_t (1/USER_HZ seconds).
+ * These are all stored as 0.64 fixed-point binary fractions.
+ */
+u64 __cputime_jiffies_factor;
+u64 __cputime_msec_factor;
+u64 __cputime_sec_factor;
+u64 __cputime_clockt_factor;
+
+static void calc_cputime_factors(void)
+{
+       struct div_result res;
+
+       div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
+       __cputime_jiffies_factor = res.result_low;
+       div128_by_32(1000, 0, tb_ticks_per_sec, &res);
+       __cputime_msec_factor = res.result_low;
+       div128_by_32(1, 0, tb_ticks_per_sec, &res);
+       __cputime_sec_factor = res.result_low;
+       div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
+       __cputime_clockt_factor = res.result_low;
+}
+
+/*
+ * Read the PURR on systems that have it, otherwise the timebase.
+ */
+static u64 read_purr(void)
+{
+       if (cpu_has_feature(CPU_FTR_PURR))
+               return mfspr(SPRN_PURR);
+       return mftb();
+}
+
+/*
+ * Account time for a transition between system, hard irq
+ * or soft irq state.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+       u64 now, delta;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       now = read_purr();
+       delta = now - get_paca()->startpurr;
+       get_paca()->startpurr = now;
+       if (!in_interrupt()) {
+               delta += get_paca()->system_time;
+               get_paca()->system_time = 0;
+       }
+       account_system_time(tsk, 0, delta);
+       local_irq_restore(flags);
+}
+
+/*
+ * Transfer the user and system times accumulated in the paca
+ * by the exception entry and exit code to the generic process
+ * user and system time records.
+ * Must be called with interrupts disabled.
+ */
+void account_process_vtime(struct task_struct *tsk)
+{
+       cputime_t utime;
+
+       utime = get_paca()->user_time;
+       get_paca()->user_time = 0;
+       account_user_time(tsk, utime);
+}
+
+static void account_process_time(struct pt_regs *regs)
+{
+       int cpu = smp_processor_id();
+
+       account_process_vtime(current);
+       run_local_timers();
+       if (rcu_pending(cpu))
+               rcu_check_callbacks(cpu, user_mode(regs));
+       scheduler_tick();
+       run_posix_cpu_timers(current);
+}
+
+#ifdef CONFIG_PPC_SPLPAR
+/*
+ * Stuff for accounting stolen time.
+ */
+struct cpu_purr_data {
+       int     initialized;                    /* thread is running */
+       u64     tb0;                    /* timebase at origin time */
+       u64     purr0;                  /* PURR at origin time */
+       u64     tb;                     /* last TB value read */
+       u64     purr;                   /* last PURR value read */
+       u64     stolen;                 /* stolen time so far */
+       spinlock_t lock;
+};
+
+static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data);
+
+static void snapshot_tb_and_purr(void *data)
+{
+       struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
+
+       p->tb0 = mftb();
+       p->purr0 = mfspr(SPRN_PURR);
+       p->tb = p->tb0;
+       p->purr = 0;
+       wmb();
+       p->initialized = 1;
+}
+
+/*
+ * Called during boot when all cpus have come up.
+ */
+void snapshot_timebases(void)
+{
+       int cpu;
+
+       if (!cpu_has_feature(CPU_FTR_PURR))
+               return;
+       for_each_cpu(cpu)
+               spin_lock_init(&per_cpu(cpu_purr_data, cpu).lock);
+       on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1);
+}
+
+void calculate_steal_time(void)
+{
+       u64 tb, purr, t0;
+       s64 stolen;
+       struct cpu_purr_data *p0, *pme, *phim;
+       int cpu;
+
+       if (!cpu_has_feature(CPU_FTR_PURR))
+               return;
+       cpu = smp_processor_id();
+       pme = &per_cpu(cpu_purr_data, cpu);
+       if (!pme->initialized)
+               return;         /* this can happen in early boot */
+       p0 = &per_cpu(cpu_purr_data, cpu & ~1);
+       phim = &per_cpu(cpu_purr_data, cpu ^ 1);
+       spin_lock(&p0->lock);
+       tb = mftb();
+       purr = mfspr(SPRN_PURR) - pme->purr0;
+       if (!phim->initialized || !cpu_online(cpu ^ 1)) {
+               stolen = (tb - pme->tb) - (purr - pme->purr);
+       } else {
+               t0 = pme->tb0;
+               if (phim->tb0 < t0)
+                       t0 = phim->tb0;
+               stolen = phim->tb - t0 - phim->purr - purr - p0->stolen;
+       }
+       if (stolen > 0) {
+               account_steal_time(current, stolen);
+               p0->stolen += stolen;
+       }
+       pme->tb = tb;
+       pme->purr = purr;
+       spin_unlock(&p0->lock);
+}
+
+/*
+ * Must be called before the cpu is added to the online map when
+ * a cpu is being brought up at runtime.
+ */
+static void snapshot_purr(void)
+{
+       int cpu;
+       u64 purr;
+       struct cpu_purr_data *p0, *pme, *phim;
+       unsigned long flags;
+
+       if (!cpu_has_feature(CPU_FTR_PURR))
+               return;
+       cpu = smp_processor_id();
+       pme = &per_cpu(cpu_purr_data, cpu);
+       p0 = &per_cpu(cpu_purr_data, cpu & ~1);
+       phim = &per_cpu(cpu_purr_data, cpu ^ 1);
+       spin_lock_irqsave(&p0->lock, flags);
+       pme->tb = pme->tb0 = mftb();
+       purr = mfspr(SPRN_PURR);
+       if (!phim->initialized) {
+               pme->purr = 0;
+               pme->purr0 = purr;
+       } else {
+               /* set p->purr and p->purr0 for no change in p0->stolen */
+               pme->purr = phim->tb - phim->tb0 - phim->purr - p0->stolen;
+               pme->purr0 = purr - pme->purr;
+       }
+       pme->initialized = 1;
+       spin_unlock_irqrestore(&p0->lock, flags);
+}
+
+#endif /* CONFIG_PPC_SPLPAR */
+
+#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
+#define calc_cputime_factors()
+#define account_process_time(regs)     update_process_times(user_mode(regs))
+#define calculate_steal_time()         do { } while (0)
+#endif
+
+#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR))
+#define snapshot_purr()                        do { } while (0)
+#endif
+
+/*
+ * Called when a cpu comes up after the system has finished booting,
+ * i.e. as a result of a hotplug cpu action.
+ */
+void snapshot_timebase(void)
+{
+       __get_cpu_var(last_jiffy) = get_tb();
+       snapshot_purr();
+}
+
 void __delay(unsigned long loops)
 {
        unsigned long start;
@@ -382,6 +597,7 @@ static void iSeries_tb_recal(void)
                                                new_tb_ticks_per_jiffy, sign, tick_diff );
                                tb_ticks_per_jiffy = new_tb_ticks_per_jiffy;
                                tb_ticks_per_sec   = new_tb_ticks_per_sec;
+                               calc_cputime_factors();
                                div128_by_32( XSEC_PER_SEC, 0, tb_ticks_per_sec, &divres );
                                do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
                                tb_to_xs = divres.result_low;
@@ -430,6 +646,7 @@ void timer_interrupt(struct pt_regs * regs)
        irq_enter();
 
        profile_tick(CPU_PROFILING, regs);
+       calculate_steal_time();
 
 #ifdef CONFIG_PPC_ISERIES
        get_lppaca()->int_dword.fields.decr_int = 0;
@@ -451,7 +668,7 @@ void timer_interrupt(struct pt_regs * regs)
                 * is the case.
                 */
                if (!cpu_is_offline(cpu))
-                       update_process_times(user_mode(regs));
+                       account_process_time(regs);
 
                /*
                 * No need to check whether cpu is offline here; boot_cpuid
@@ -508,13 +725,27 @@ void wakeup_decrementer(void)
 void __init smp_space_timers(unsigned int max_cpus)
 {
        int i;
+       unsigned long half = tb_ticks_per_jiffy / 2;
        unsigned long offset = tb_ticks_per_jiffy / max_cpus;
        unsigned long previous_tb = per_cpu(last_jiffy, boot_cpuid);
 
        /* make sure tb > per_cpu(last_jiffy, cpu) for all cpus always */
        previous_tb -= tb_ticks_per_jiffy;
+       /*
+        * The stolen time calculation for POWER5 shared-processor LPAR
+        * systems works better if the two threads' timebase interrupts
+        * are staggered by half a jiffy with respect to each other.
+        */
        for_each_cpu(i) {
-               if (i != boot_cpuid) {
+               if (i == boot_cpuid)
+                       continue;
+               if (i == (boot_cpuid ^ 1))
+                       per_cpu(last_jiffy, i) =
+                               per_cpu(last_jiffy, boot_cpuid) - half;
+               else if (i & 1)
+                       per_cpu(last_jiffy, i) =
+                               per_cpu(last_jiffy, i ^ 1) + half;
+               else {
                        previous_tb += offset;
                        per_cpu(last_jiffy, i) = previous_tb;
                }
@@ -706,6 +937,7 @@ void __init time_init(void)
        tb_ticks_per_sec = ppc_tb_freq;
        tb_ticks_per_usec = ppc_tb_freq / 1000000;
        tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
+       calc_cputime_factors();
 
        /*
         * Calculate the length of each tick in ns.  It will not be
index 90d005bb4d1cf976b6b28d61464bfe7750515096..99d12ff6346c5c8cec2280bce6e433f92ed5597d 100644 (file)
@@ -117,6 +117,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
 #define CPU_FTR_MMCRA_SIHV             ASM_CONST(0x0000080000000000)
 #define CPU_FTR_CI_LARGE_PAGE          ASM_CONST(0x0000100000000000)
 #define CPU_FTR_PAUSE_ZERO             ASM_CONST(0x0000200000000000)
+#define CPU_FTR_PURR                   ASM_CONST(0x0000400000000000)
 #else
 /* ensure on 32b processors the flags are available for compiling but
  * don't do anything */
@@ -132,6 +133,7 @@ extern void do_cpu_ftr_fixups(unsigned long offset);
 #define CPU_FTR_LOCKLESS_TLBIE         ASM_CONST(0x0)
 #define CPU_FTR_MMCRA_SIHV             ASM_CONST(0x0)
 #define CPU_FTR_CI_LARGE_PAGE          ASM_CONST(0x0)
+#define CPU_FTR_PURR                   ASM_CONST(0x0)
 #endif
 
 #ifndef __ASSEMBLY__
@@ -316,7 +318,7 @@ enum {
            CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 |
            CPU_FTR_MMCRA | CPU_FTR_SMT |
            CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE |
-           CPU_FTR_MMCRA_SIHV,
+           CPU_FTR_MMCRA_SIHV | CPU_FTR_PURR,
        CPU_FTRS_CELL = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB |
            CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 |
            CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT |
index 6d68ad7e0ea3f41f0f5eb4eaf9895b77e87c659d..a21185d478830365eaa8fc2d8085f31c76f35ad7 100644 (file)
@@ -1 +1,203 @@
+/*
+ * Definitions for measuring cputime on powerpc machines.
+ *
+ * Copyright (C) 2006 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * If we have CONFIG_VIRT_CPU_ACCOUNTING, we measure cpu time in
+ * the same units as the timebase.  Otherwise we measure cpu time
+ * in jiffies using the generic definitions.
+ */
+
+#ifndef __POWERPC_CPUTIME_H
+#define __POWERPC_CPUTIME_H
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #include <asm-generic/cputime.h>
+#else
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <asm/div64.h>
+#include <asm/time.h>
+#include <asm/param.h>
+
+typedef u64 cputime_t;
+typedef u64 cputime64_t;
+
+#define cputime_zero                   ((cputime_t)0)
+#define cputime_max                    ((~((cputime_t)0) >> 1) - 1)
+#define cputime_add(__a, __b)          ((__a) +  (__b))
+#define cputime_sub(__a, __b)          ((__a) -  (__b))
+#define cputime_div(__a, __n)          ((__a) /  (__n))
+#define cputime_halve(__a)             ((__a) >> 1)
+#define cputime_eq(__a, __b)           ((__a) == (__b))
+#define cputime_gt(__a, __b)           ((__a) >  (__b))
+#define cputime_ge(__a, __b)           ((__a) >= (__b))
+#define cputime_lt(__a, __b)           ((__a) <  (__b))
+#define cputime_le(__a, __b)           ((__a) <= (__b))
+
+#define cputime64_zero                 ((cputime64_t)0)
+#define cputime64_add(__a, __b)                ((__a) + (__b))
+#define cputime_to_cputime64(__ct)     (__ct)
+
+#ifdef __KERNEL__
+
+/*
+ * Convert cputime <-> jiffies
+ */
+extern u64 __cputime_jiffies_factor;
+
+static inline unsigned long cputime_to_jiffies(const cputime_t ct)
+{
+       return mulhdu(ct, __cputime_jiffies_factor);
+}
+
+static inline cputime_t jiffies_to_cputime(const unsigned long jif)
+{
+       cputime_t ct;
+       unsigned long sec;
+
+       /* have to be a little careful about overflow */
+       ct = jif % HZ;
+       sec = jif / HZ;
+       if (ct) {
+               ct *= tb_ticks_per_sec;
+               do_div(ct, HZ);
+       }
+       if (sec)
+               ct += (cputime_t) sec * tb_ticks_per_sec;
+       return ct;
+}
+
+static inline u64 cputime64_to_jiffies64(const cputime_t ct)
+{
+       return mulhdu(ct, __cputime_jiffies_factor);
+}
+
+/*
+ * Convert cputime <-> milliseconds
+ */
+extern u64 __cputime_msec_factor;
+
+static inline unsigned long cputime_to_msecs(const cputime_t ct)
+{
+       return mulhdu(ct, __cputime_msec_factor);
+}
+
+static inline cputime_t msecs_to_cputime(const unsigned long ms)
+{
+       cputime_t ct;
+       unsigned long sec;
+
+       /* have to be a little careful about overflow */
+       ct = ms % 1000;
+       sec = ms / 1000;
+       if (ct) {
+               ct *= tb_ticks_per_sec;
+               do_div(ct, 1000);
+       }
+       if (sec)
+               ct += (cputime_t) sec * tb_ticks_per_sec;
+       return ct;
+}
+
+/*
+ * Convert cputime <-> seconds
+ */
+extern u64 __cputime_sec_factor;
+
+static inline unsigned long cputime_to_secs(const cputime_t ct)
+{
+       return mulhdu(ct, __cputime_sec_factor);
+}
+
+static inline cputime_t secs_to_cputime(const unsigned long sec)
+{
+       return (cputime_t) sec * tb_ticks_per_sec;
+}
+
+/*
+ * Convert cputime <-> timespec
+ */
+static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
+{
+       u64 x = ct;
+       unsigned int frac;
+
+       frac = do_div(x, tb_ticks_per_sec);
+       p->tv_sec = x;
+       x = (u64) frac * 1000000000;
+       do_div(x, tb_ticks_per_sec);
+       p->tv_nsec = x;
+}
+
+static inline cputime_t timespec_to_cputime(const struct timespec *p)
+{
+       cputime_t ct;
+
+       ct = (u64) p->tv_nsec * tb_ticks_per_sec;
+       do_div(ct, 1000000000);
+       return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+}
+
+/*
+ * Convert cputime <-> timeval
+ */
+static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
+{
+       u64 x = ct;
+       unsigned int frac;
+
+       frac = do_div(x, tb_ticks_per_sec);
+       p->tv_sec = x;
+       x = (u64) frac * 1000000;
+       do_div(x, tb_ticks_per_sec);
+       p->tv_usec = x;
+}
+
+static inline cputime_t timeval_to_cputime(const struct timeval *p)
+{
+       cputime_t ct;
+
+       ct = (u64) p->tv_usec * tb_ticks_per_sec;
+       do_div(ct, 1000000);
+       return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+}
+
+/*
+ * Convert cputime <-> clock_t (units of 1/USER_HZ seconds)
+ */
+extern u64 __cputime_clockt_factor;
+
+static inline unsigned long cputime_to_clock_t(const cputime_t ct)
+{
+       return mulhdu(ct, __cputime_clockt_factor);
+}
+
+static inline cputime_t clock_t_to_cputime(const unsigned long clk)
+{
+       cputime_t ct;
+       unsigned long sec;
+
+       /* have to be a little careful about overflow */
+       ct = clk % USER_HZ;
+       sec = clk / USER_HZ;
+       if (ct) {
+               ct *= tb_ticks_per_sec;
+               do_div(ct, USER_HZ);
+       }
+       if (sec)
+               ct += (cputime_t) sec * tb_ticks_per_sec;
+       return ct;
+}
+
+#define cputime64_to_clock_t(ct)       cputime_to_clock_t((cputime_t)(ct))
+
+#endif /* __KERNEL__ */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+#endif /* __POWERPC_CPUTIME_H */
index 8eb7e857ec4cd167176c78b644ee046a6fbe09b5..51f87d9993b6d09e2833f2ff403a58783c61d7bf 100644 (file)
@@ -479,6 +479,10 @@ extern int distribute_irqs;
 struct irqaction;
 struct pt_regs;
 
+#define __ARCH_HAS_DO_SOFTIRQ
+
+extern void __do_softirq(void);
+
 #ifdef CONFIG_IRQSTACKS
 /*
  * Per-cpu stacks for handling hard and soft interrupts.
@@ -491,8 +495,6 @@ extern void call_do_softirq(struct thread_info *tp);
 extern int call___do_IRQ(int irq, struct pt_regs *regs,
                struct thread_info *tp);
 
-#define __ARCH_HAS_DO_SOFTIRQ
-
 #else
 #define irq_ctx_init()
 
index ec94b51074fcf775dbf7f7102200a7999808c61b..4465b95ebef0a07d3fade371b63cc658f83b74a2 100644 (file)
@@ -96,6 +96,11 @@ struct paca_struct {
        u64 saved_r1;                   /* r1 save for RTAS calls */
        u64 saved_msr;                  /* MSR saved here by enter_rtas */
        u8 proc_enabled;                /* irq soft-enable flag */
+
+       /* Stuff for accurate time accounting */
+       u64 user_time;                  /* accumulated usermode TB ticks */
+       u64 system_time;                /* accumulated system TB ticks */
+       u64 startpurr;                  /* PURR/TB value snapshot */
 };
 
 extern struct paca_struct paca[];
index ab8688d39024763ca01cd626035330015a06fab8..dd1c0a913d5f6324712d676c827372a40e880553 100644 (file)
 
 #define SZL                    (BITS_PER_LONG/8)
 
+/*
+ * Stuff for accurate CPU time accounting.
+ * These macros handle transitions between user and system state
+ * in exception entry and exit and accumulate time to the
+ * user_time and system_time fields in the paca.
+ */
+
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#define ACCOUNT_CPU_USER_ENTRY(ra, rb)
+#define ACCOUNT_CPU_USER_EXIT(ra, rb)
+#else
+#define ACCOUNT_CPU_USER_ENTRY(ra, rb)                                 \
+       beq     2f;                     /* if from kernel mode */       \
+BEGIN_FTR_SECTION;                                                     \
+       mfspr   ra,SPRN_PURR;           /* get processor util. reg */   \
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);                                   \
+BEGIN_FTR_SECTION;                                                     \
+       mftb    ra;                     /* or get TB if no PURR */      \
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);                                   \
+       ld      rb,PACA_STARTPURR(r13);                         \
+       std     ra,PACA_STARTPURR(r13);                                 \
+       subf    rb,rb,ra;               /* subtract start value */      \
+       ld      ra,PACA_USER_TIME(r13);                                 \
+       add     ra,ra,rb;               /* add on to user time */       \
+       std     ra,PACA_USER_TIME(r13);                                 \
+2:
+
+#define ACCOUNT_CPU_USER_EXIT(ra, rb)                                  \
+BEGIN_FTR_SECTION;                                                     \
+       mfspr   ra,SPRN_PURR;           /* get processor util. reg */   \
+END_FTR_SECTION_IFSET(CPU_FTR_PURR);                                   \
+BEGIN_FTR_SECTION;                                                     \
+       mftb    ra;                     /* or get TB if no PURR */      \
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);                                   \
+       ld      rb,PACA_STARTPURR(r13);                         \
+       std     ra,PACA_STARTPURR(r13);                                 \
+       subf    rb,rb,ra;               /* subtract start value */      \
+       ld      ra,PACA_SYSTEM_TIME(r13);                               \
+       add     ra,ra,rb;               /* add on to user time */       \
+       std     ra,PACA_SYSTEM_TIME(r13);
+#endif
+
 /*
  * Macros for storing registers into and loading registers from
  * exception frames.
index d9bf53653b10de8175fba20f16be1b9840a5f61a..41b7a5b3d701adca739fe90bd6e3c938398535e8 100644 (file)
@@ -424,5 +424,9 @@ static inline void create_function_call(unsigned long addr, void * func)
        create_branch(addr, func_addr, BRANCH_SET_LINK);
 }
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void account_system_vtime(struct task_struct *);
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_SYSTEM_H */
index baddc9ab57adc1e561c6805be229449ffc64582b..912118db13ae7d183a41a46b2236edc4d1bc08a2 100644 (file)
@@ -41,6 +41,7 @@ extern time_t last_rtc_update;
 
 extern void generic_calibrate_decr(void);
 extern void wakeup_decrementer(void);
+extern void snapshot_timebase(void);
 
 /* Some sane defaults: 125 MHz timebase, 1GHz processor */
 extern unsigned long ppc_proc_freq;
@@ -221,5 +222,19 @@ struct cpu_usage {
 
 DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void account_process_vtime(struct task_struct *tsk);
+#else
+#define account_process_vtime(tsk)             do { } while (0)
+#endif
+
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
+extern void calculate_steal_time(void);
+extern void snapshot_timebases(void);
+#else
+#define calculate_steal_time()                 do { } while (0)
+#define snapshot_timebases()                   do { } while (0)
+#endif
+
 #endif /* __KERNEL__ */
 #endif /* __PPC64_TIME_H */
index 321fb75b5f222f7a20ca3841fedf5b040ba823cf..c86112323c9f097e3297d7dc74a51efeae76b4cb 100644 (file)
@@ -153,5 +153,10 @@ extern __inline__ unsigned binary_tbl(void) {
 ({unsigned z; asm ("mulhwu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
 
 unsigned mulhwu_scale_factor(unsigned, unsigned);
+
+#define account_process_vtime(tsk)             do { } while (0)
+#define calculate_steal_time()                 do { } while (0)
+#define snapshot_timebases()                   do { } while (0)
+
 #endif /* __ASM_TIME_H__ */
 #endif /* __KERNEL__ */