Merge branch 'linux-next' of git://git.infradead.org/ubi-2.6
[sfrench/cifs-2.6.git] / arch / ia64 / xen / time.c
1 /******************************************************************************
2  * arch/ia64/xen/time.c
3  *
4  * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  */
22
23 #include <linux/delay.h>
24 #include <linux/kernel_stat.h>
25 #include <linux/posix-timers.h>
26 #include <linux/irq.h>
27 #include <linux/clocksource.h>
28
29 #include <asm/timex.h>
30
31 #include <asm/xen/hypervisor.h>
32
33 #include <xen/interface/vcpu.h>
34
35 #include "../kernel/fsyscall_gtod_data.h"
36
37 DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
38 DEFINE_PER_CPU(unsigned long, processed_stolen_time);
39 DEFINE_PER_CPU(unsigned long, processed_blocked_time);
40
41 /* taken from i386/kernel/time-xen.c */
42 static void xen_init_missing_ticks_accounting(int cpu)
43 {
44         struct vcpu_register_runstate_memory_area area;
45         struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
46         int rc;
47
48         memset(runstate, 0, sizeof(*runstate));
49
50         area.addr.v = runstate;
51         rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu,
52                                 &area);
53         WARN_ON(rc && rc != -ENOSYS);
54
55         per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked];
56         per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable]
57                                             + runstate->time[RUNSTATE_offline];
58 }
59
60 /*
61  * Runstate accounting
62  */
63 /* stolen from arch/x86/xen/time.c */
64 static void get_runstate_snapshot(struct vcpu_runstate_info *res)
65 {
66         u64 state_time;
67         struct vcpu_runstate_info *state;
68
69         BUG_ON(preemptible());
70
71         state = &__get_cpu_var(runstate);
72
73         /*
74          * The runstate info is always updated by the hypervisor on
75          * the current CPU, so there's no need to use anything
76          * stronger than a compiler barrier when fetching it.
77          */
78         do {
79                 state_time = state->state_entry_time;
80                 rmb();
81                 *res = *state;
82                 rmb();
83         } while (state->state_entry_time != state_time);
84 }
85
86 #define NS_PER_TICK (1000000000LL/HZ)
87
88 static unsigned long
89 consider_steal_time(unsigned long new_itm)
90 {
91         unsigned long stolen, blocked;
92         unsigned long delta_itm = 0, stolentick = 0;
93         int cpu = smp_processor_id();
94         struct vcpu_runstate_info runstate;
95         struct task_struct *p = current;
96
97         get_runstate_snapshot(&runstate);
98
99         /*
100          * Check for vcpu migration effect
101          * In this case, itc value is reversed.
102          * This causes huge stolen value.
103          * This function just checks and reject this effect.
104          */
105         if (!time_after_eq(runstate.time[RUNSTATE_blocked],
106                            per_cpu(processed_blocked_time, cpu)))
107                 blocked = 0;
108
109         if (!time_after_eq(runstate.time[RUNSTATE_runnable] +
110                            runstate.time[RUNSTATE_offline],
111                            per_cpu(processed_stolen_time, cpu)))
112                 stolen = 0;
113
114         if (!time_after(delta_itm + new_itm, ia64_get_itc()))
115                 stolentick = ia64_get_itc() - new_itm;
116
117         do_div(stolentick, NS_PER_TICK);
118         stolentick++;
119
120         do_div(stolen, NS_PER_TICK);
121
122         if (stolen > stolentick)
123                 stolen = stolentick;
124
125         stolentick -= stolen;
126         do_div(blocked, NS_PER_TICK);
127
128         if (blocked > stolentick)
129                 blocked = stolentick;
130
131         if (stolen > 0 || blocked > 0) {
132                 account_steal_ticks(stolen);
133                 account_idle_ticks(blocked);
134                 run_local_timers();
135
136                 if (rcu_pending(cpu))
137                         rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
138
139                 scheduler_tick();
140                 run_posix_cpu_timers(p);
141                 delta_itm += local_cpu_data->itm_delta * (stolen + blocked);
142
143                 if (cpu == time_keeper_id) {
144                         write_seqlock(&xtime_lock);
145                         do_timer(stolen + blocked);
146                         local_cpu_data->itm_next = delta_itm + new_itm;
147                         write_sequnlock(&xtime_lock);
148                 } else {
149                         local_cpu_data->itm_next = delta_itm + new_itm;
150                 }
151                 per_cpu(processed_stolen_time, cpu) += NS_PER_TICK * stolen;
152                 per_cpu(processed_blocked_time, cpu) += NS_PER_TICK * blocked;
153         }
154         return delta_itm;
155 }
156
157 static int xen_do_steal_accounting(unsigned long *new_itm)
158 {
159         unsigned long delta_itm;
160         delta_itm = consider_steal_time(*new_itm);
161         *new_itm += delta_itm;
162         if (time_after(*new_itm, ia64_get_itc()) && delta_itm)
163                 return 1;
164
165         return 0;
166 }
167
168 static void xen_itc_jitter_data_reset(void)
169 {
170         u64 lcycle, ret;
171
172         do {
173                 lcycle = itc_jitter_data.itc_lastcycle;
174                 ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0);
175         } while (unlikely(ret != lcycle));
176 }
177
178 /* based on xen_sched_clock() in arch/x86/xen/time.c. */
179 /*
180  * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined,
181  * something similar logic should be implemented here.
182  */
183 /*
184  * Xen sched_clock implementation.  Returns the number of unstolen
185  * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
186  * states.
187  */
188 static unsigned long long xen_sched_clock(void)
189 {
190         struct vcpu_runstate_info runstate;
191
192         unsigned long long now;
193         unsigned long long offset;
194         unsigned long long ret;
195
196         /*
197          * Ideally sched_clock should be called on a per-cpu basis
198          * anyway, so preempt should already be disabled, but that's
199          * not current practice at the moment.
200          */
201         preempt_disable();
202
203         /*
204          * both ia64_native_sched_clock() and xen's runstate are
205          * based on mAR.ITC. So difference of them makes sense.
206          */
207         now = ia64_native_sched_clock();
208
209         get_runstate_snapshot(&runstate);
210
211         WARN_ON(runstate.state != RUNSTATE_running);
212
213         offset = 0;
214         if (now > runstate.state_entry_time)
215                 offset = now - runstate.state_entry_time;
216         ret = runstate.time[RUNSTATE_blocked] +
217                 runstate.time[RUNSTATE_running] +
218                 offset;
219
220         preempt_enable();
221
222         return ret;
223 }
224
225 struct pv_time_ops xen_time_ops __initdata = {
226         .init_missing_ticks_accounting  = xen_init_missing_ticks_accounting,
227         .do_steal_accounting            = xen_do_steal_accounting,
228         .clocksource_resume             = xen_itc_jitter_data_reset,
229         .sched_clock                    = xen_sched_clock,
230 };
231
232 /* Called after suspend, to resume time.  */
233 static void xen_local_tick_resume(void)
234 {
235         /* Just trigger a tick.  */
236         ia64_cpu_local_tick();
237         touch_softlockup_watchdog();
238 }
239
240 void
241 xen_timer_resume(void)
242 {
243         unsigned int cpu;
244
245         xen_local_tick_resume();
246
247         for_each_online_cpu(cpu)
248                 xen_init_missing_ticks_accounting(cpu);
249 }
250
251 static void ia64_cpu_local_tick_fn(void *unused)
252 {
253         xen_local_tick_resume();
254         xen_init_missing_ticks_accounting(smp_processor_id());
255 }
256
257 void
258 xen_timer_resume_on_aps(void)
259 {
260         smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1);
261 }