xen: support sysenter/sysexit if hypervisor does
[sfrench/cifs-2.6.git] / arch / x86 / xen / smp.c
1 /*
2  * Xen SMP support
3  *
4  * This file implements the Xen versions of smp_ops.  SMP under Xen is
5  * very straightforward.  Bringing a CPU up is simply a matter of
6  * loading its initial context and setting it running.
7  *
8  * IPIs are handled through the Xen event mechanism.
9  *
10  * Because virtual CPUs can be scheduled onto any real CPU, there's no
11  * useful topology information for the kernel to make use of.  As a
12  * result, all CPUs are treated as if they're single-core and
13  * single-threaded.
14  *
15  * This does not handle HOTPLUG_CPU yet.
16  */
17 #include <linux/sched.h>
18 #include <linux/err.h>
19 #include <linux/smp.h>
20
21 #include <asm/paravirt.h>
22 #include <asm/desc.h>
23 #include <asm/pgtable.h>
24 #include <asm/cpu.h>
25
26 #include <xen/interface/xen.h>
27 #include <xen/interface/vcpu.h>
28
29 #include <asm/xen/interface.h>
30 #include <asm/xen/hypercall.h>
31
32 #include <xen/page.h>
33 #include <xen/events.h>
34
35 #include "xen-ops.h"
36 #include "mmu.h"
37
38 static cpumask_t xen_cpu_initialized_map;
39 static DEFINE_PER_CPU(int, resched_irq);
40 static DEFINE_PER_CPU(int, callfunc_irq);
41
42 /*
43  * Structure and data for smp_call_function(). This is designed to minimise
44  * static memory requirements. It also looks cleaner.
45  */
46 static DEFINE_SPINLOCK(call_lock);
47
48 struct call_data_struct {
49         void (*func) (void *info);
50         void *info;
51         atomic_t started;
52         atomic_t finished;
53         int wait;
54 };
55
56 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
57
58 static struct call_data_struct *call_data;
59
60 /*
61  * Reschedule call back. Nothing to do,
62  * all the work is done automatically when
63  * we return from the interrupt.
64  */
65 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
66 {
67         return IRQ_HANDLED;
68 }
69
70 static __cpuinit void cpu_bringup_and_idle(void)
71 {
72         int cpu = smp_processor_id();
73
74         cpu_init();
75         xen_enable_sysenter();
76
77         preempt_disable();
78         per_cpu(cpu_state, cpu) = CPU_ONLINE;
79
80         xen_setup_cpu_clockevents();
81
82         /* We can take interrupts now: we're officially "up". */
83         local_irq_enable();
84
85         wmb();                  /* make sure everything is out */
86         cpu_idle();
87 }
88
89 static int xen_smp_intr_init(unsigned int cpu)
90 {
91         int rc;
92         const char *resched_name, *callfunc_name;
93
94         per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
95
96         resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
97         rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
98                                     cpu,
99                                     xen_reschedule_interrupt,
100                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
101                                     resched_name,
102                                     NULL);
103         if (rc < 0)
104                 goto fail;
105         per_cpu(resched_irq, cpu) = rc;
106
107         callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
108         rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
109                                     cpu,
110                                     xen_call_function_interrupt,
111                                     IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
112                                     callfunc_name,
113                                     NULL);
114         if (rc < 0)
115                 goto fail;
116         per_cpu(callfunc_irq, cpu) = rc;
117
118         return 0;
119
120  fail:
121         if (per_cpu(resched_irq, cpu) >= 0)
122                 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
123         if (per_cpu(callfunc_irq, cpu) >= 0)
124                 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
125         return rc;
126 }
127
128 void __init xen_fill_possible_map(void)
129 {
130         int i, rc;
131
132         for (i = 0; i < NR_CPUS; i++) {
133                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
134                 if (rc >= 0)
135                         cpu_set(i, cpu_possible_map);
136         }
137 }
138
139 void __init xen_smp_prepare_boot_cpu(void)
140 {
141         int cpu;
142
143         BUG_ON(smp_processor_id() != 0);
144         native_smp_prepare_boot_cpu();
145
146         /* We've switched to the "real" per-cpu gdt, so make sure the
147            old memory can be recycled */
148         make_lowmem_page_readwrite(&per_cpu__gdt_page);
149
150         for_each_possible_cpu(cpu) {
151                 cpus_clear(per_cpu(cpu_sibling_map, cpu));
152                 /*
153                  * cpu_core_map lives in a per cpu area that is cleared
154                  * when the per cpu array is allocated.
155                  *
156                  * cpus_clear(per_cpu(cpu_core_map, cpu));
157                  */
158         }
159
160         xen_setup_vcpu_info_placement();
161 }
162
163 void __init xen_smp_prepare_cpus(unsigned int max_cpus)
164 {
165         unsigned cpu;
166
167         for_each_possible_cpu(cpu) {
168                 cpus_clear(per_cpu(cpu_sibling_map, cpu));
169                 /*
170                  * cpu_core_ map will be zeroed when the per
171                  * cpu area is allocated.
172                  *
173                  * cpus_clear(per_cpu(cpu_core_map, cpu));
174                  */
175         }
176
177         smp_store_cpu_info(0);
178         set_cpu_sibling_map(0);
179
180         if (xen_smp_intr_init(0))
181                 BUG();
182
183         xen_cpu_initialized_map = cpumask_of_cpu(0);
184
185         /* Restrict the possible_map according to max_cpus. */
186         while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
187                 for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
188                         continue;
189                 cpu_clear(cpu, cpu_possible_map);
190         }
191
192         for_each_possible_cpu (cpu) {
193                 struct task_struct *idle;
194
195                 if (cpu == 0)
196                         continue;
197
198                 idle = fork_idle(cpu);
199                 if (IS_ERR(idle))
200                         panic("failed fork for CPU %d", cpu);
201
202                 cpu_set(cpu, cpu_present_map);
203         }
204
205         //init_xenbus_allowed_cpumask();
206 }
207
208 static __cpuinit int
209 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
210 {
211         struct vcpu_guest_context *ctxt;
212         struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
213
214         if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
215                 return 0;
216
217         ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
218         if (ctxt == NULL)
219                 return -ENOMEM;
220
221         ctxt->flags = VGCF_IN_KERNEL;
222         ctxt->user_regs.ds = __USER_DS;
223         ctxt->user_regs.es = __USER_DS;
224         ctxt->user_regs.fs = __KERNEL_PERCPU;
225         ctxt->user_regs.gs = 0;
226         ctxt->user_regs.ss = __KERNEL_DS;
227         ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
228         ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
229
230         memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
231
232         xen_copy_trap_info(ctxt->trap_ctxt);
233
234         ctxt->ldt_ents = 0;
235
236         BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK);
237         make_lowmem_page_readonly(gdt->gdt);
238
239         ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt);
240         ctxt->gdt_ents      = ARRAY_SIZE(gdt->gdt);
241
242         ctxt->user_regs.cs = __KERNEL_CS;
243         ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
244
245         ctxt->kernel_ss = __KERNEL_DS;
246         ctxt->kernel_sp = idle->thread.sp0;
247
248         ctxt->event_callback_cs     = __KERNEL_CS;
249         ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback;
250         ctxt->failsafe_callback_cs  = __KERNEL_CS;
251         ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback;
252
253         per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
254         ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
255
256         if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
257                 BUG();
258
259         kfree(ctxt);
260         return 0;
261 }
262
263 int __cpuinit xen_cpu_up(unsigned int cpu)
264 {
265         struct task_struct *idle = idle_task(cpu);
266         int rc;
267
268 #if 0
269         rc = cpu_up_check(cpu);
270         if (rc)
271                 return rc;
272 #endif
273
274         init_gdt(cpu);
275         per_cpu(current_task, cpu) = idle;
276         irq_ctx_init(cpu);
277         xen_setup_timer(cpu);
278
279         /* make sure interrupts start blocked */
280         per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
281
282         rc = cpu_initialize_context(cpu, idle);
283         if (rc)
284                 return rc;
285
286         if (num_online_cpus() == 1)
287                 alternatives_smp_switch(1);
288
289         rc = xen_smp_intr_init(cpu);
290         if (rc)
291                 return rc;
292
293         smp_store_cpu_info(cpu);
294         set_cpu_sibling_map(cpu);
295         /* This must be done before setting cpu_online_map */
296         wmb();
297
298         cpu_set(cpu, cpu_online_map);
299
300         rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
301         BUG_ON(rc);
302
303         return 0;
304 }
305
306 void xen_smp_cpus_done(unsigned int max_cpus)
307 {
308 }
309
310 static void stop_self(void *v)
311 {
312         int cpu = smp_processor_id();
313
314         /* make sure we're not pinning something down */
315         load_cr3(swapper_pg_dir);
316         /* should set up a minimal gdt */
317
318         HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
319         BUG();
320 }
321
322 void xen_smp_send_stop(void)
323 {
324         smp_call_function(stop_self, NULL, 0, 0);
325 }
326
327 void xen_smp_send_reschedule(int cpu)
328 {
329         xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
330 }
331
332
333 static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
334 {
335         unsigned cpu;
336
337         cpus_and(mask, mask, cpu_online_map);
338
339         for_each_cpu_mask(cpu, mask)
340                 xen_send_IPI_one(cpu, vector);
341 }
342
343 static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
344 {
345         void (*func) (void *info) = call_data->func;
346         void *info = call_data->info;
347         int wait = call_data->wait;
348
349         /*
350          * Notify initiating CPU that I've grabbed the data and am
351          * about to execute the function
352          */
353         mb();
354         atomic_inc(&call_data->started);
355         /*
356          * At this point the info structure may be out of scope unless wait==1
357          */
358         irq_enter();
359         (*func)(info);
360         __get_cpu_var(irq_stat).irq_call_count++;
361         irq_exit();
362
363         if (wait) {
364                 mb();           /* commit everything before setting finished */
365                 atomic_inc(&call_data->finished);
366         }
367
368         return IRQ_HANDLED;
369 }
370
371 int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
372                                void *info, int wait)
373 {
374         struct call_data_struct data;
375         int cpus, cpu;
376         bool yield;
377
378         /* Holding any lock stops cpus from going down. */
379         spin_lock(&call_lock);
380
381         cpu_clear(smp_processor_id(), mask);
382
383         cpus = cpus_weight(mask);
384         if (!cpus) {
385                 spin_unlock(&call_lock);
386                 return 0;
387         }
388
389         /* Can deadlock when called with interrupts disabled */
390         WARN_ON(irqs_disabled());
391
392         data.func = func;
393         data.info = info;
394         atomic_set(&data.started, 0);
395         data.wait = wait;
396         if (wait)
397                 atomic_set(&data.finished, 0);
398
399         call_data = &data;
400         mb();                   /* write everything before IPI */
401
402         /* Send a message to other CPUs and wait for them to respond */
403         xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
404
405         /* Make sure other vcpus get a chance to run if they need to. */
406         yield = false;
407         for_each_cpu_mask(cpu, mask)
408                 if (xen_vcpu_stolen(cpu))
409                         yield = true;
410
411         if (yield)
412                 HYPERVISOR_sched_op(SCHEDOP_yield, 0);
413
414         /* Wait for response */
415         while (atomic_read(&data.started) != cpus ||
416                (wait && atomic_read(&data.finished) != cpus))
417                 cpu_relax();
418
419         spin_unlock(&call_lock);
420
421         return 0;
422 }