Merge tag 'for-linus-5.11-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / drivers / xen / events / events_base.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Xen event channels
4  *
5  * Xen models interrupts with abstract event channels.  Because each
6  * domain gets 1024 event channels, but NR_IRQ is not that large, we
7  * must dynamically map irqs<->event channels.  The event channels
8  * interface with the rest of the kernel by defining a xen interrupt
9  * chip.  When an event is received, it is mapped to an irq and sent
10  * through the normal interrupt processing path.
11  *
12  * There are four kinds of events which can be mapped to an event
13  * channel:
14  *
15  * 1. Inter-domain notifications.  This includes all the virtual
16  *    device events, since they're driven by front-ends in another domain
17  *    (typically dom0).
18  * 2. VIRQs, typically used for timers.  These are per-cpu events.
19  * 3. IPIs.
20  * 4. PIRQs - Hardware interrupts.
21  *
22  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
23  */
24
25 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
26
27 #include <linux/linkage.h>
28 #include <linux/interrupt.h>
29 #include <linux/irq.h>
30 #include <linux/moduleparam.h>
31 #include <linux/string.h>
32 #include <linux/memblock.h>
33 #include <linux/slab.h>
34 #include <linux/irqnr.h>
35 #include <linux/pci.h>
36 #include <linux/spinlock.h>
37 #include <linux/cpuhotplug.h>
38 #include <linux/atomic.h>
39 #include <linux/ktime.h>
40
41 #ifdef CONFIG_X86
42 #include <asm/desc.h>
43 #include <asm/ptrace.h>
44 #include <asm/idtentry.h>
45 #include <asm/irq.h>
46 #include <asm/io_apic.h>
47 #include <asm/i8259.h>
48 #include <asm/xen/pci.h>
49 #endif
50 #include <asm/sync_bitops.h>
51 #include <asm/xen/hypercall.h>
52 #include <asm/xen/hypervisor.h>
53 #include <xen/page.h>
54
55 #include <xen/xen.h>
56 #include <xen/hvm.h>
57 #include <xen/xen-ops.h>
58 #include <xen/events.h>
59 #include <xen/interface/xen.h>
60 #include <xen/interface/event_channel.h>
61 #include <xen/interface/hvm/hvm_op.h>
62 #include <xen/interface/hvm/params.h>
63 #include <xen/interface/physdev.h>
64 #include <xen/interface/sched.h>
65 #include <xen/interface/vcpu.h>
66 #include <asm/hw_irq.h>
67
68 #include "events_internal.h"
69
70 #undef MODULE_PARAM_PREFIX
71 #define MODULE_PARAM_PREFIX "xen."
72
73 /* Interrupt types. */
74 enum xen_irq_type {
75         IRQT_UNBOUND = 0,
76         IRQT_PIRQ,
77         IRQT_VIRQ,
78         IRQT_IPI,
79         IRQT_EVTCHN
80 };
81
82 /*
83  * Packed IRQ information:
84  * type - enum xen_irq_type
85  * event channel - irq->event channel mapping
86  * cpu - cpu this event channel is bound to
87  * index - type-specific information:
88  *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
89  *           guest, or GSI (real passthrough IRQ) of the device.
90  *    VIRQ - virq number
91  *    IPI - IPI vector
92  *    EVTCHN -
93  */
94 struct irq_info {
95         struct list_head list;
96         struct list_head eoi_list;
97         short refcnt;
98         u8 spurious_cnt;
99         u8 is_accounted;
100         enum xen_irq_type type; /* type */
101         unsigned irq;
102         evtchn_port_t evtchn;   /* event channel */
103         unsigned short cpu;     /* cpu bound */
104         unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
105         unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
106         u64 eoi_time;           /* Time in jiffies when to EOI. */
107
108         union {
109                 unsigned short virq;
110                 enum ipi_vector ipi;
111                 struct {
112                         unsigned short pirq;
113                         unsigned short gsi;
114                         unsigned char vector;
115                         unsigned char flags;
116                         uint16_t domid;
117                 } pirq;
118         } u;
119 };
120
121 #define PIRQ_NEEDS_EOI  (1 << 0)
122 #define PIRQ_SHAREABLE  (1 << 1)
123 #define PIRQ_MSI_GROUP  (1 << 2)
124
125 static uint __read_mostly event_loop_timeout = 2;
126 module_param(event_loop_timeout, uint, 0644);
127
128 static uint __read_mostly event_eoi_delay = 10;
129 module_param(event_eoi_delay, uint, 0644);
130
131 const struct evtchn_ops *evtchn_ops;
132
133 /*
134  * This lock protects updates to the following mapping and reference-count
135  * arrays. The lock does not need to be acquired to read the mapping tables.
136  */
137 static DEFINE_MUTEX(irq_mapping_update_lock);
138
139 /*
140  * Lock protecting event handling loop against removing event channels.
141  * Adding of event channels is no issue as the associated IRQ becomes active
142  * only after everything is setup (before request_[threaded_]irq() the handler
143  * can't be entered for an event, as the event channel will be unmasked only
144  * then).
145  */
146 static DEFINE_RWLOCK(evtchn_rwlock);
147
148 /*
149  * Lock hierarchy:
150  *
151  * irq_mapping_update_lock
152  *   evtchn_rwlock
153  *     IRQ-desc lock
154  *       percpu eoi_list_lock
155  */
156
157 static LIST_HEAD(xen_irq_list_head);
158
159 /* IRQ <-> VIRQ mapping. */
160 static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
161
162 /* IRQ <-> IPI mapping */
163 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
164
165 /* Event channel distribution data */
166 static atomic_t channels_on_cpu[NR_CPUS];
167
168 static int **evtchn_to_irq;
169 #ifdef CONFIG_X86
170 static unsigned long *pirq_eoi_map;
171 #endif
172 static bool (*pirq_needs_eoi)(unsigned irq);
173
174 #define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
175 #define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
176 #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
177
178 /* Xen will never allocate port zero for any purpose. */
179 #define VALID_EVTCHN(chn)       ((chn) != 0)
180
181 static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
182
183 static struct irq_chip xen_dynamic_chip;
184 static struct irq_chip xen_lateeoi_chip;
185 static struct irq_chip xen_percpu_chip;
186 static struct irq_chip xen_pirq_chip;
187 static void enable_dynirq(struct irq_data *data);
188 static void disable_dynirq(struct irq_data *data);
189
190 static DEFINE_PER_CPU(unsigned int, irq_epoch);
191
192 static void clear_evtchn_to_irq_row(unsigned row)
193 {
194         unsigned col;
195
196         for (col = 0; col < EVTCHN_PER_ROW; col++)
197                 WRITE_ONCE(evtchn_to_irq[row][col], -1);
198 }
199
200 static void clear_evtchn_to_irq_all(void)
201 {
202         unsigned row;
203
204         for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
205                 if (evtchn_to_irq[row] == NULL)
206                         continue;
207                 clear_evtchn_to_irq_row(row);
208         }
209 }
210
211 static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
212 {
213         unsigned row;
214         unsigned col;
215
216         if (evtchn >= xen_evtchn_max_channels())
217                 return -EINVAL;
218
219         row = EVTCHN_ROW(evtchn);
220         col = EVTCHN_COL(evtchn);
221
222         if (evtchn_to_irq[row] == NULL) {
223                 /* Unallocated irq entries return -1 anyway */
224                 if (irq == -1)
225                         return 0;
226
227                 evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
228                 if (evtchn_to_irq[row] == NULL)
229                         return -ENOMEM;
230
231                 clear_evtchn_to_irq_row(row);
232         }
233
234         WRITE_ONCE(evtchn_to_irq[row][col], irq);
235         return 0;
236 }
237
238 int get_evtchn_to_irq(evtchn_port_t evtchn)
239 {
240         if (evtchn >= xen_evtchn_max_channels())
241                 return -1;
242         if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
243                 return -1;
244         return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
245 }
246
247 /* Get info for IRQ */
248 static struct irq_info *info_for_irq(unsigned irq)
249 {
250         if (irq < nr_legacy_irqs())
251                 return legacy_info_ptrs[irq];
252         else
253                 return irq_get_chip_data(irq);
254 }
255
256 static void set_info_for_irq(unsigned int irq, struct irq_info *info)
257 {
258         if (irq < nr_legacy_irqs())
259                 legacy_info_ptrs[irq] = info;
260         else
261                 irq_set_chip_data(irq, info);
262 }
263
264 /* Per CPU channel accounting */
265 static void channels_on_cpu_dec(struct irq_info *info)
266 {
267         if (!info->is_accounted)
268                 return;
269
270         info->is_accounted = 0;
271
272         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
273                 return;
274
275         WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
276 }
277
278 static void channels_on_cpu_inc(struct irq_info *info)
279 {
280         if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
281                 return;
282
283         if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
284                                             INT_MAX)))
285                 return;
286
287         info->is_accounted = 1;
288 }
289
290 /* Constructors for packed IRQ information. */
291 static int xen_irq_info_common_setup(struct irq_info *info,
292                                      unsigned irq,
293                                      enum xen_irq_type type,
294                                      evtchn_port_t evtchn,
295                                      unsigned short cpu)
296 {
297         int ret;
298
299         BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
300
301         info->type = type;
302         info->irq = irq;
303         info->evtchn = evtchn;
304         info->cpu = cpu;
305
306         ret = set_evtchn_to_irq(evtchn, irq);
307         if (ret < 0)
308                 return ret;
309
310         irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
311
312         return xen_evtchn_port_setup(evtchn);
313 }
314
315 static int xen_irq_info_evtchn_setup(unsigned irq,
316                                      evtchn_port_t evtchn)
317 {
318         struct irq_info *info = info_for_irq(irq);
319
320         return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
321 }
322
323 static int xen_irq_info_ipi_setup(unsigned cpu,
324                                   unsigned irq,
325                                   evtchn_port_t evtchn,
326                                   enum ipi_vector ipi)
327 {
328         struct irq_info *info = info_for_irq(irq);
329
330         info->u.ipi = ipi;
331
332         per_cpu(ipi_to_irq, cpu)[ipi] = irq;
333
334         return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
335 }
336
337 static int xen_irq_info_virq_setup(unsigned cpu,
338                                    unsigned irq,
339                                    evtchn_port_t evtchn,
340                                    unsigned virq)
341 {
342         struct irq_info *info = info_for_irq(irq);
343
344         info->u.virq = virq;
345
346         per_cpu(virq_to_irq, cpu)[virq] = irq;
347
348         return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
349 }
350
351 static int xen_irq_info_pirq_setup(unsigned irq,
352                                    evtchn_port_t evtchn,
353                                    unsigned pirq,
354                                    unsigned gsi,
355                                    uint16_t domid,
356                                    unsigned char flags)
357 {
358         struct irq_info *info = info_for_irq(irq);
359
360         info->u.pirq.pirq = pirq;
361         info->u.pirq.gsi = gsi;
362         info->u.pirq.domid = domid;
363         info->u.pirq.flags = flags;
364
365         return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
366 }
367
368 static void xen_irq_info_cleanup(struct irq_info *info)
369 {
370         set_evtchn_to_irq(info->evtchn, -1);
371         info->evtchn = 0;
372         channels_on_cpu_dec(info);
373 }
374
375 /*
376  * Accessors for packed IRQ information.
377  */
378 evtchn_port_t evtchn_from_irq(unsigned irq)
379 {
380         const struct irq_info *info = NULL;
381
382         if (likely(irq < nr_irqs))
383                 info = info_for_irq(irq);
384         if (!info)
385                 return 0;
386
387         return info->evtchn;
388 }
389
390 unsigned int irq_from_evtchn(evtchn_port_t evtchn)
391 {
392         return get_evtchn_to_irq(evtchn);
393 }
394 EXPORT_SYMBOL_GPL(irq_from_evtchn);
395
396 int irq_from_virq(unsigned int cpu, unsigned int virq)
397 {
398         return per_cpu(virq_to_irq, cpu)[virq];
399 }
400
401 static enum ipi_vector ipi_from_irq(unsigned irq)
402 {
403         struct irq_info *info = info_for_irq(irq);
404
405         BUG_ON(info == NULL);
406         BUG_ON(info->type != IRQT_IPI);
407
408         return info->u.ipi;
409 }
410
411 static unsigned virq_from_irq(unsigned irq)
412 {
413         struct irq_info *info = info_for_irq(irq);
414
415         BUG_ON(info == NULL);
416         BUG_ON(info->type != IRQT_VIRQ);
417
418         return info->u.virq;
419 }
420
421 static unsigned pirq_from_irq(unsigned irq)
422 {
423         struct irq_info *info = info_for_irq(irq);
424
425         BUG_ON(info == NULL);
426         BUG_ON(info->type != IRQT_PIRQ);
427
428         return info->u.pirq.pirq;
429 }
430
431 static enum xen_irq_type type_from_irq(unsigned irq)
432 {
433         return info_for_irq(irq)->type;
434 }
435
436 static unsigned cpu_from_irq(unsigned irq)
437 {
438         return info_for_irq(irq)->cpu;
439 }
440
441 unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
442 {
443         int irq = get_evtchn_to_irq(evtchn);
444         unsigned ret = 0;
445
446         if (irq != -1)
447                 ret = cpu_from_irq(irq);
448
449         return ret;
450 }
451
452 #ifdef CONFIG_X86
453 static bool pirq_check_eoi_map(unsigned irq)
454 {
455         return test_bit(pirq_from_irq(irq), pirq_eoi_map);
456 }
457 #endif
458
459 static bool pirq_needs_eoi_flag(unsigned irq)
460 {
461         struct irq_info *info = info_for_irq(irq);
462         BUG_ON(info->type != IRQT_PIRQ);
463
464         return info->u.pirq.flags & PIRQ_NEEDS_EOI;
465 }
466
467 static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
468                                bool force_affinity)
469 {
470         int irq = get_evtchn_to_irq(evtchn);
471         struct irq_info *info = info_for_irq(irq);
472
473         BUG_ON(irq == -1);
474
475         if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
476                 cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
477                 cpumask_copy(irq_get_effective_affinity_mask(irq),
478                              cpumask_of(cpu));
479         }
480
481         xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
482
483         channels_on_cpu_dec(info);
484         info->cpu = cpu;
485         channels_on_cpu_inc(info);
486 }
487
488 /**
489  * notify_remote_via_irq - send event to remote end of event channel via irq
490  * @irq: irq of event channel to send event to
491  *
492  * Unlike notify_remote_via_evtchn(), this is safe to use across
493  * save/restore. Notifications on a broken connection are silently
494  * dropped.
495  */
496 void notify_remote_via_irq(int irq)
497 {
498         evtchn_port_t evtchn = evtchn_from_irq(irq);
499
500         if (VALID_EVTCHN(evtchn))
501                 notify_remote_via_evtchn(evtchn);
502 }
503 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
504
505 struct lateeoi_work {
506         struct delayed_work delayed;
507         spinlock_t eoi_list_lock;
508         struct list_head eoi_list;
509 };
510
511 static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
512
513 static void lateeoi_list_del(struct irq_info *info)
514 {
515         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
516         unsigned long flags;
517
518         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
519         list_del_init(&info->eoi_list);
520         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
521 }
522
523 static void lateeoi_list_add(struct irq_info *info)
524 {
525         struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
526         struct irq_info *elem;
527         u64 now = get_jiffies_64();
528         unsigned long delay;
529         unsigned long flags;
530
531         if (now < info->eoi_time)
532                 delay = info->eoi_time - now;
533         else
534                 delay = 1;
535
536         spin_lock_irqsave(&eoi->eoi_list_lock, flags);
537
538         if (list_empty(&eoi->eoi_list)) {
539                 list_add(&info->eoi_list, &eoi->eoi_list);
540                 mod_delayed_work_on(info->eoi_cpu, system_wq,
541                                     &eoi->delayed, delay);
542         } else {
543                 list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
544                         if (elem->eoi_time <= info->eoi_time)
545                                 break;
546                 }
547                 list_add(&info->eoi_list, &elem->eoi_list);
548         }
549
550         spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
551 }
552
553 static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
554 {
555         evtchn_port_t evtchn;
556         unsigned int cpu;
557         unsigned int delay = 0;
558
559         evtchn = info->evtchn;
560         if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
561                 return;
562
563         if (spurious) {
564                 if ((1 << info->spurious_cnt) < (HZ << 2)) {
565                         if (info->spurious_cnt != 0xFF)
566                                 info->spurious_cnt++;
567                 }
568                 if (info->spurious_cnt > 1) {
569                         delay = 1 << (info->spurious_cnt - 2);
570                         if (delay > HZ)
571                                 delay = HZ;
572                         if (!info->eoi_time)
573                                 info->eoi_cpu = smp_processor_id();
574                         info->eoi_time = get_jiffies_64() + delay;
575                 }
576         } else {
577                 info->spurious_cnt = 0;
578         }
579
580         cpu = info->eoi_cpu;
581         if (info->eoi_time &&
582             (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
583                 lateeoi_list_add(info);
584                 return;
585         }
586
587         info->eoi_time = 0;
588         unmask_evtchn(evtchn);
589 }
590
591 static void xen_irq_lateeoi_worker(struct work_struct *work)
592 {
593         struct lateeoi_work *eoi;
594         struct irq_info *info;
595         u64 now = get_jiffies_64();
596         unsigned long flags;
597
598         eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
599
600         read_lock_irqsave(&evtchn_rwlock, flags);
601
602         while (true) {
603                 spin_lock(&eoi->eoi_list_lock);
604
605                 info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
606                                                 eoi_list);
607
608                 if (info == NULL || now < info->eoi_time) {
609                         spin_unlock(&eoi->eoi_list_lock);
610                         break;
611                 }
612
613                 list_del_init(&info->eoi_list);
614
615                 spin_unlock(&eoi->eoi_list_lock);
616
617                 info->eoi_time = 0;
618
619                 xen_irq_lateeoi_locked(info, false);
620         }
621
622         if (info)
623                 mod_delayed_work_on(info->eoi_cpu, system_wq,
624                                     &eoi->delayed, info->eoi_time - now);
625
626         read_unlock_irqrestore(&evtchn_rwlock, flags);
627 }
628
629 static void xen_cpu_init_eoi(unsigned int cpu)
630 {
631         struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
632
633         INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
634         spin_lock_init(&eoi->eoi_list_lock);
635         INIT_LIST_HEAD(&eoi->eoi_list);
636 }
637
638 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
639 {
640         struct irq_info *info;
641         unsigned long flags;
642
643         read_lock_irqsave(&evtchn_rwlock, flags);
644
645         info = info_for_irq(irq);
646
647         if (info)
648                 xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
649
650         read_unlock_irqrestore(&evtchn_rwlock, flags);
651 }
652 EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
653
654 static void xen_irq_init(unsigned irq)
655 {
656         struct irq_info *info;
657
658         info = kzalloc(sizeof(*info), GFP_KERNEL);
659         if (info == NULL)
660                 panic("Unable to allocate metadata for IRQ%d\n", irq);
661
662         info->type = IRQT_UNBOUND;
663         info->refcnt = -1;
664
665         set_info_for_irq(irq, info);
666         /*
667          * Interrupt affinity setting can be immediate. No point
668          * in delaying it until an interrupt is handled.
669          */
670         irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
671
672         INIT_LIST_HEAD(&info->eoi_list);
673         list_add_tail(&info->list, &xen_irq_list_head);
674 }
675
676 static int __must_check xen_allocate_irqs_dynamic(int nvec)
677 {
678         int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
679
680         if (irq >= 0) {
681                 for (i = 0; i < nvec; i++)
682                         xen_irq_init(irq + i);
683         }
684
685         return irq;
686 }
687
688 static inline int __must_check xen_allocate_irq_dynamic(void)
689 {
690
691         return xen_allocate_irqs_dynamic(1);
692 }
693
694 static int __must_check xen_allocate_irq_gsi(unsigned gsi)
695 {
696         int irq;
697
698         /*
699          * A PV guest has no concept of a GSI (since it has no ACPI
700          * nor access to/knowledge of the physical APICs). Therefore
701          * all IRQs are dynamically allocated from the entire IRQ
702          * space.
703          */
704         if (xen_pv_domain() && !xen_initial_domain())
705                 return xen_allocate_irq_dynamic();
706
707         /* Legacy IRQ descriptors are already allocated by the arch. */
708         if (gsi < nr_legacy_irqs())
709                 irq = gsi;
710         else
711                 irq = irq_alloc_desc_at(gsi, -1);
712
713         xen_irq_init(irq);
714
715         return irq;
716 }
717
718 static void xen_free_irq(unsigned irq)
719 {
720         struct irq_info *info = info_for_irq(irq);
721         unsigned long flags;
722
723         if (WARN_ON(!info))
724                 return;
725
726         write_lock_irqsave(&evtchn_rwlock, flags);
727
728         if (!list_empty(&info->eoi_list))
729                 lateeoi_list_del(info);
730
731         list_del(&info->list);
732
733         set_info_for_irq(irq, NULL);
734
735         WARN_ON(info->refcnt > 0);
736
737         write_unlock_irqrestore(&evtchn_rwlock, flags);
738
739         kfree(info);
740
741         /* Legacy IRQ descriptors are managed by the arch. */
742         if (irq < nr_legacy_irqs())
743                 return;
744
745         irq_free_desc(irq);
746 }
747
748 static void xen_evtchn_close(evtchn_port_t port)
749 {
750         struct evtchn_close close;
751
752         close.port = port;
753         if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
754                 BUG();
755 }
756
757 static void pirq_query_unmask(int irq)
758 {
759         struct physdev_irq_status_query irq_status;
760         struct irq_info *info = info_for_irq(irq);
761
762         BUG_ON(info->type != IRQT_PIRQ);
763
764         irq_status.irq = pirq_from_irq(irq);
765         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
766                 irq_status.flags = 0;
767
768         info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
769         if (irq_status.flags & XENIRQSTAT_needs_eoi)
770                 info->u.pirq.flags |= PIRQ_NEEDS_EOI;
771 }
772
773 static void eoi_pirq(struct irq_data *data)
774 {
775         evtchn_port_t evtchn = evtchn_from_irq(data->irq);
776         struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
777         int rc = 0;
778
779         if (!VALID_EVTCHN(evtchn))
780                 return;
781
782         clear_evtchn(evtchn);
783
784         if (pirq_needs_eoi(data->irq)) {
785                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
786                 WARN_ON(rc);
787         }
788 }
789
790 static void mask_ack_pirq(struct irq_data *data)
791 {
792         disable_dynirq(data);
793         eoi_pirq(data);
794 }
795
796 static unsigned int __startup_pirq(unsigned int irq)
797 {
798         struct evtchn_bind_pirq bind_pirq;
799         struct irq_info *info = info_for_irq(irq);
800         evtchn_port_t evtchn = evtchn_from_irq(irq);
801         int rc;
802
803         BUG_ON(info->type != IRQT_PIRQ);
804
805         if (VALID_EVTCHN(evtchn))
806                 goto out;
807
808         bind_pirq.pirq = pirq_from_irq(irq);
809         /* NB. We are happy to share unless we are probing. */
810         bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
811                                         BIND_PIRQ__WILL_SHARE : 0;
812         rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
813         if (rc != 0) {
814                 pr_warn("Failed to obtain physical IRQ %d\n", irq);
815                 return 0;
816         }
817         evtchn = bind_pirq.port;
818
819         pirq_query_unmask(irq);
820
821         rc = set_evtchn_to_irq(evtchn, irq);
822         if (rc)
823                 goto err;
824
825         info->evtchn = evtchn;
826         bind_evtchn_to_cpu(evtchn, 0, false);
827
828         rc = xen_evtchn_port_setup(evtchn);
829         if (rc)
830                 goto err;
831
832 out:
833         unmask_evtchn(evtchn);
834         eoi_pirq(irq_get_irq_data(irq));
835
836         return 0;
837
838 err:
839         pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
840         xen_evtchn_close(evtchn);
841         return 0;
842 }
843
844 static unsigned int startup_pirq(struct irq_data *data)
845 {
846         return __startup_pirq(data->irq);
847 }
848
849 static void shutdown_pirq(struct irq_data *data)
850 {
851         unsigned int irq = data->irq;
852         struct irq_info *info = info_for_irq(irq);
853         evtchn_port_t evtchn = evtchn_from_irq(irq);
854
855         BUG_ON(info->type != IRQT_PIRQ);
856
857         if (!VALID_EVTCHN(evtchn))
858                 return;
859
860         mask_evtchn(evtchn);
861         xen_evtchn_close(evtchn);
862         xen_irq_info_cleanup(info);
863 }
864
865 static void enable_pirq(struct irq_data *data)
866 {
867         enable_dynirq(data);
868 }
869
870 static void disable_pirq(struct irq_data *data)
871 {
872         disable_dynirq(data);
873 }
874
875 int xen_irq_from_gsi(unsigned gsi)
876 {
877         struct irq_info *info;
878
879         list_for_each_entry(info, &xen_irq_list_head, list) {
880                 if (info->type != IRQT_PIRQ)
881                         continue;
882
883                 if (info->u.pirq.gsi == gsi)
884                         return info->irq;
885         }
886
887         return -1;
888 }
889 EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
890
891 static void __unbind_from_irq(unsigned int irq)
892 {
893         evtchn_port_t evtchn = evtchn_from_irq(irq);
894         struct irq_info *info = info_for_irq(irq);
895
896         if (info->refcnt > 0) {
897                 info->refcnt--;
898                 if (info->refcnt != 0)
899                         return;
900         }
901
902         if (VALID_EVTCHN(evtchn)) {
903                 unsigned int cpu = cpu_from_irq(irq);
904
905                 xen_evtchn_close(evtchn);
906
907                 switch (type_from_irq(irq)) {
908                 case IRQT_VIRQ:
909                         per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
910                         break;
911                 case IRQT_IPI:
912                         per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
913                         break;
914                 default:
915                         break;
916                 }
917
918                 xen_irq_info_cleanup(info);
919         }
920
921         xen_free_irq(irq);
922 }
923
924 /*
925  * Do not make any assumptions regarding the relationship between the
926  * IRQ number returned here and the Xen pirq argument.
927  *
928  * Note: We don't assign an event channel until the irq actually started
929  * up.  Return an existing irq if we've already got one for the gsi.
930  *
931  * Shareable implies level triggered, not shareable implies edge
932  * triggered here.
933  */
934 int xen_bind_pirq_gsi_to_irq(unsigned gsi,
935                              unsigned pirq, int shareable, char *name)
936 {
937         int irq = -1;
938         struct physdev_irq irq_op;
939         int ret;
940
941         mutex_lock(&irq_mapping_update_lock);
942
943         irq = xen_irq_from_gsi(gsi);
944         if (irq != -1) {
945                 pr_info("%s: returning irq %d for gsi %u\n",
946                         __func__, irq, gsi);
947                 goto out;
948         }
949
950         irq = xen_allocate_irq_gsi(gsi);
951         if (irq < 0)
952                 goto out;
953
954         irq_op.irq = irq;
955         irq_op.vector = 0;
956
957         /* Only the privileged domain can do this. For non-priv, the pcifront
958          * driver provides a PCI bus that does the call to do exactly
959          * this in the priv domain. */
960         if (xen_initial_domain() &&
961             HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
962                 xen_free_irq(irq);
963                 irq = -ENOSPC;
964                 goto out;
965         }
966
967         ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
968                                shareable ? PIRQ_SHAREABLE : 0);
969         if (ret < 0) {
970                 __unbind_from_irq(irq);
971                 irq = ret;
972                 goto out;
973         }
974
975         pirq_query_unmask(irq);
976         /* We try to use the handler with the appropriate semantic for the
977          * type of interrupt: if the interrupt is an edge triggered
978          * interrupt we use handle_edge_irq.
979          *
980          * On the other hand if the interrupt is level triggered we use
981          * handle_fasteoi_irq like the native code does for this kind of
982          * interrupts.
983          *
984          * Depending on the Xen version, pirq_needs_eoi might return true
985          * not only for level triggered interrupts but for edge triggered
986          * interrupts too. In any case Xen always honors the eoi mechanism,
987          * not injecting any more pirqs of the same kind if the first one
988          * hasn't received an eoi yet. Therefore using the fasteoi handler
989          * is the right choice either way.
990          */
991         if (shareable)
992                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
993                                 handle_fasteoi_irq, name);
994         else
995                 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
996                                 handle_edge_irq, name);
997
998 out:
999         mutex_unlock(&irq_mapping_update_lock);
1000
1001         return irq;
1002 }
1003
1004 #ifdef CONFIG_PCI_MSI
1005 int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
1006 {
1007         int rc;
1008         struct physdev_get_free_pirq op_get_free_pirq;
1009
1010         op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
1011         rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
1012
1013         WARN_ONCE(rc == -ENOSYS,
1014                   "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1015
1016         return rc ? -1 : op_get_free_pirq.pirq;
1017 }
1018
1019 int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
1020                              int pirq, int nvec, const char *name, domid_t domid)
1021 {
1022         int i, irq, ret;
1023
1024         mutex_lock(&irq_mapping_update_lock);
1025
1026         irq = xen_allocate_irqs_dynamic(nvec);
1027         if (irq < 0)
1028                 goto out;
1029
1030         for (i = 0; i < nvec; i++) {
1031                 irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
1032
1033                 ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
1034                                               i == 0 ? 0 : PIRQ_MSI_GROUP);
1035                 if (ret < 0)
1036                         goto error_irq;
1037         }
1038
1039         ret = irq_set_msi_desc(irq, msidesc);
1040         if (ret < 0)
1041                 goto error_irq;
1042 out:
1043         mutex_unlock(&irq_mapping_update_lock);
1044         return irq;
1045 error_irq:
1046         while (nvec--)
1047                 __unbind_from_irq(irq + nvec);
1048         mutex_unlock(&irq_mapping_update_lock);
1049         return ret;
1050 }
1051 #endif
1052
1053 int xen_destroy_irq(int irq)
1054 {
1055         struct physdev_unmap_pirq unmap_irq;
1056         struct irq_info *info = info_for_irq(irq);
1057         int rc = -ENOENT;
1058
1059         mutex_lock(&irq_mapping_update_lock);
1060
1061         /*
1062          * If trying to remove a vector in a MSI group different
1063          * than the first one skip the PIRQ unmap unless this vector
1064          * is the first one in the group.
1065          */
1066         if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
1067                 unmap_irq.pirq = info->u.pirq.pirq;
1068                 unmap_irq.domid = info->u.pirq.domid;
1069                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
1070                 /* If another domain quits without making the pci_disable_msix
1071                  * call, the Xen hypervisor takes care of freeing the PIRQs
1072                  * (free_domain_pirqs).
1073                  */
1074                 if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
1075                         pr_info("domain %d does not have %d anymore\n",
1076                                 info->u.pirq.domid, info->u.pirq.pirq);
1077                 else if (rc) {
1078                         pr_warn("unmap irq failed %d\n", rc);
1079                         goto out;
1080                 }
1081         }
1082
1083         xen_free_irq(irq);
1084
1085 out:
1086         mutex_unlock(&irq_mapping_update_lock);
1087         return rc;
1088 }
1089
1090 int xen_irq_from_pirq(unsigned pirq)
1091 {
1092         int irq;
1093
1094         struct irq_info *info;
1095
1096         mutex_lock(&irq_mapping_update_lock);
1097
1098         list_for_each_entry(info, &xen_irq_list_head, list) {
1099                 if (info->type != IRQT_PIRQ)
1100                         continue;
1101                 irq = info->irq;
1102                 if (info->u.pirq.pirq == pirq)
1103                         goto out;
1104         }
1105         irq = -1;
1106 out:
1107         mutex_unlock(&irq_mapping_update_lock);
1108
1109         return irq;
1110 }
1111
1112
1113 int xen_pirq_from_irq(unsigned irq)
1114 {
1115         return pirq_from_irq(irq);
1116 }
1117 EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
1118
1119 static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
1120 {
1121         int irq;
1122         int ret;
1123
1124         if (evtchn >= xen_evtchn_max_channels())
1125                 return -ENOMEM;
1126
1127         mutex_lock(&irq_mapping_update_lock);
1128
1129         irq = get_evtchn_to_irq(evtchn);
1130
1131         if (irq == -1) {
1132                 irq = xen_allocate_irq_dynamic();
1133                 if (irq < 0)
1134                         goto out;
1135
1136                 irq_set_chip_and_handler_name(irq, chip,
1137                                               handle_edge_irq, "event");
1138
1139                 ret = xen_irq_info_evtchn_setup(irq, evtchn);
1140                 if (ret < 0) {
1141                         __unbind_from_irq(irq);
1142                         irq = ret;
1143                         goto out;
1144                 }
1145                 /*
1146                  * New interdomain events are initially bound to vCPU0 This
1147                  * is required to setup the event channel in the first
1148                  * place and also important for UP guests because the
1149                  * affinity setting is not invoked on them so nothing would
1150                  * bind the channel.
1151                  */
1152                 bind_evtchn_to_cpu(evtchn, 0, false);
1153         } else {
1154                 struct irq_info *info = info_for_irq(irq);
1155                 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
1156         }
1157
1158 out:
1159         mutex_unlock(&irq_mapping_update_lock);
1160
1161         return irq;
1162 }
1163
1164 int bind_evtchn_to_irq(evtchn_port_t evtchn)
1165 {
1166         return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip);
1167 }
1168 EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
1169
1170 static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
1171 {
1172         struct evtchn_bind_ipi bind_ipi;
1173         evtchn_port_t evtchn;
1174         int ret, irq;
1175
1176         mutex_lock(&irq_mapping_update_lock);
1177
1178         irq = per_cpu(ipi_to_irq, cpu)[ipi];
1179
1180         if (irq == -1) {
1181                 irq = xen_allocate_irq_dynamic();
1182                 if (irq < 0)
1183                         goto out;
1184
1185                 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1186                                               handle_percpu_irq, "ipi");
1187
1188                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
1189                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1190                                                 &bind_ipi) != 0)
1191                         BUG();
1192                 evtchn = bind_ipi.port;
1193
1194                 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1195                 if (ret < 0) {
1196                         __unbind_from_irq(irq);
1197                         irq = ret;
1198                         goto out;
1199                 }
1200                 /*
1201                  * Force the affinity mask to the target CPU so proc shows
1202                  * the correct target.
1203                  */
1204                 bind_evtchn_to_cpu(evtchn, cpu, true);
1205         } else {
1206                 struct irq_info *info = info_for_irq(irq);
1207                 WARN_ON(info == NULL || info->type != IRQT_IPI);
1208         }
1209
1210  out:
1211         mutex_unlock(&irq_mapping_update_lock);
1212         return irq;
1213 }
1214
1215 static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain,
1216                                                evtchn_port_t remote_port,
1217                                                struct irq_chip *chip)
1218 {
1219         struct evtchn_bind_interdomain bind_interdomain;
1220         int err;
1221
1222         bind_interdomain.remote_dom  = remote_domain;
1223         bind_interdomain.remote_port = remote_port;
1224
1225         err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1226                                           &bind_interdomain);
1227
1228         return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
1229                                                chip);
1230 }
1231
1232 int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
1233                                            evtchn_port_t remote_port)
1234 {
1235         return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
1236                                                    &xen_lateeoi_chip);
1237 }
1238 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
1239
1240 static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
1241 {
1242         struct evtchn_status status;
1243         evtchn_port_t port;
1244         int rc = -ENOENT;
1245
1246         memset(&status, 0, sizeof(status));
1247         for (port = 0; port < xen_evtchn_max_channels(); port++) {
1248                 status.dom = DOMID_SELF;
1249                 status.port = port;
1250                 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
1251                 if (rc < 0)
1252                         continue;
1253                 if (status.status != EVTCHNSTAT_virq)
1254                         continue;
1255                 if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
1256                         *evtchn = port;
1257                         break;
1258                 }
1259         }
1260         return rc;
1261 }
1262
1263 /**
1264  * xen_evtchn_nr_channels - number of usable event channel ports
1265  *
1266  * This may be less than the maximum supported by the current
1267  * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1268  * supported.
1269  */
1270 unsigned xen_evtchn_nr_channels(void)
1271 {
1272         return evtchn_ops->nr_channels();
1273 }
1274 EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
1275
1276 int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
1277 {
1278         struct evtchn_bind_virq bind_virq;
1279         evtchn_port_t evtchn = 0;
1280         int irq, ret;
1281
1282         mutex_lock(&irq_mapping_update_lock);
1283
1284         irq = per_cpu(virq_to_irq, cpu)[virq];
1285
1286         if (irq == -1) {
1287                 irq = xen_allocate_irq_dynamic();
1288                 if (irq < 0)
1289                         goto out;
1290
1291                 if (percpu)
1292                         irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1293                                                       handle_percpu_irq, "virq");
1294                 else
1295                         irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
1296                                                       handle_edge_irq, "virq");
1297
1298                 bind_virq.virq = virq;
1299                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1300                 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1301                                                 &bind_virq);
1302                 if (ret == 0)
1303                         evtchn = bind_virq.port;
1304                 else {
1305                         if (ret == -EEXIST)
1306                                 ret = find_virq(virq, cpu, &evtchn);
1307                         BUG_ON(ret < 0);
1308                 }
1309
1310                 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1311                 if (ret < 0) {
1312                         __unbind_from_irq(irq);
1313                         irq = ret;
1314                         goto out;
1315                 }
1316
1317                 /*
1318                  * Force the affinity mask for percpu interrupts so proc
1319                  * shows the correct target.
1320                  */
1321                 bind_evtchn_to_cpu(evtchn, cpu, percpu);
1322         } else {
1323                 struct irq_info *info = info_for_irq(irq);
1324                 WARN_ON(info == NULL || info->type != IRQT_VIRQ);
1325         }
1326
1327 out:
1328         mutex_unlock(&irq_mapping_update_lock);
1329
1330         return irq;
1331 }
1332
1333 static void unbind_from_irq(unsigned int irq)
1334 {
1335         mutex_lock(&irq_mapping_update_lock);
1336         __unbind_from_irq(irq);
1337         mutex_unlock(&irq_mapping_update_lock);
1338 }
1339
1340 static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
1341                                           irq_handler_t handler,
1342                                           unsigned long irqflags,
1343                                           const char *devname, void *dev_id,
1344                                           struct irq_chip *chip)
1345 {
1346         int irq, retval;
1347
1348         irq = bind_evtchn_to_irq_chip(evtchn, chip);
1349         if (irq < 0)
1350                 return irq;
1351         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1352         if (retval != 0) {
1353                 unbind_from_irq(irq);
1354                 return retval;
1355         }
1356
1357         return irq;
1358 }
1359
1360 int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
1361                               irq_handler_t handler,
1362                               unsigned long irqflags,
1363                               const char *devname, void *dev_id)
1364 {
1365         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1366                                               devname, dev_id,
1367                                               &xen_dynamic_chip);
1368 }
1369 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1370
1371 int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
1372                                       irq_handler_t handler,
1373                                       unsigned long irqflags,
1374                                       const char *devname, void *dev_id)
1375 {
1376         return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1377                                               devname, dev_id,
1378                                               &xen_lateeoi_chip);
1379 }
1380 EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
1381
1382 static int bind_interdomain_evtchn_to_irqhandler_chip(
1383                 unsigned int remote_domain, evtchn_port_t remote_port,
1384                 irq_handler_t handler, unsigned long irqflags,
1385                 const char *devname, void *dev_id, struct irq_chip *chip)
1386 {
1387         int irq, retval;
1388
1389         irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
1390                                                   chip);
1391         if (irq < 0)
1392                 return irq;
1393
1394         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1395         if (retval != 0) {
1396                 unbind_from_irq(irq);
1397                 return retval;
1398         }
1399
1400         return irq;
1401 }
1402
1403 int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
1404                                                   evtchn_port_t remote_port,
1405                                                   irq_handler_t handler,
1406                                                   unsigned long irqflags,
1407                                                   const char *devname,
1408                                                   void *dev_id)
1409 {
1410         return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
1411                                 remote_port, handler, irqflags, devname,
1412                                 dev_id, &xen_lateeoi_chip);
1413 }
1414 EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
1415
1416 int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1417                             irq_handler_t handler,
1418                             unsigned long irqflags, const char *devname, void *dev_id)
1419 {
1420         int irq, retval;
1421
1422         irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
1423         if (irq < 0)
1424                 return irq;
1425         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1426         if (retval != 0) {
1427                 unbind_from_irq(irq);
1428                 return retval;
1429         }
1430
1431         return irq;
1432 }
1433 EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1434
1435 int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1436                            unsigned int cpu,
1437                            irq_handler_t handler,
1438                            unsigned long irqflags,
1439                            const char *devname,
1440                            void *dev_id)
1441 {
1442         int irq, retval;
1443
1444         irq = bind_ipi_to_irq(ipi, cpu);
1445         if (irq < 0)
1446                 return irq;
1447
1448         irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1449         retval = request_irq(irq, handler, irqflags, devname, dev_id);
1450         if (retval != 0) {
1451                 unbind_from_irq(irq);
1452                 return retval;
1453         }
1454
1455         return irq;
1456 }
1457
1458 void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1459 {
1460         struct irq_info *info = info_for_irq(irq);
1461
1462         if (WARN_ON(!info))
1463                 return;
1464         free_irq(irq, dev_id);
1465         unbind_from_irq(irq);
1466 }
1467 EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1468
1469 /**
1470  * xen_set_irq_priority() - set an event channel priority.
1471  * @irq:irq bound to an event channel.
1472  * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1473  */
1474 int xen_set_irq_priority(unsigned irq, unsigned priority)
1475 {
1476         struct evtchn_set_priority set_priority;
1477
1478         set_priority.port = evtchn_from_irq(irq);
1479         set_priority.priority = priority;
1480
1481         return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1482                                            &set_priority);
1483 }
1484 EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1485
1486 int evtchn_make_refcounted(evtchn_port_t evtchn)
1487 {
1488         int irq = get_evtchn_to_irq(evtchn);
1489         struct irq_info *info;
1490
1491         if (irq == -1)
1492                 return -ENOENT;
1493
1494         info = info_for_irq(irq);
1495
1496         if (!info)
1497                 return -ENOENT;
1498
1499         WARN_ON(info->refcnt != -1);
1500
1501         info->refcnt = 1;
1502
1503         return 0;
1504 }
1505 EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1506
1507 int evtchn_get(evtchn_port_t evtchn)
1508 {
1509         int irq;
1510         struct irq_info *info;
1511         int err = -ENOENT;
1512
1513         if (evtchn >= xen_evtchn_max_channels())
1514                 return -EINVAL;
1515
1516         mutex_lock(&irq_mapping_update_lock);
1517
1518         irq = get_evtchn_to_irq(evtchn);
1519         if (irq == -1)
1520                 goto done;
1521
1522         info = info_for_irq(irq);
1523
1524         if (!info)
1525                 goto done;
1526
1527         err = -EINVAL;
1528         if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
1529                 goto done;
1530
1531         info->refcnt++;
1532         err = 0;
1533  done:
1534         mutex_unlock(&irq_mapping_update_lock);
1535
1536         return err;
1537 }
1538 EXPORT_SYMBOL_GPL(evtchn_get);
1539
1540 void evtchn_put(evtchn_port_t evtchn)
1541 {
1542         int irq = get_evtchn_to_irq(evtchn);
1543         if (WARN_ON(irq == -1))
1544                 return;
1545         unbind_from_irq(irq);
1546 }
1547 EXPORT_SYMBOL_GPL(evtchn_put);
1548
1549 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1550 {
1551         int irq;
1552
1553 #ifdef CONFIG_X86
1554         if (unlikely(vector == XEN_NMI_VECTOR)) {
1555                 int rc =  HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
1556                                              NULL);
1557                 if (rc < 0)
1558                         printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1559                 return;
1560         }
1561 #endif
1562         irq = per_cpu(ipi_to_irq, cpu)[vector];
1563         BUG_ON(irq < 0);
1564         notify_remote_via_irq(irq);
1565 }
1566
1567 struct evtchn_loop_ctrl {
1568         ktime_t timeout;
1569         unsigned count;
1570         bool defer_eoi;
1571 };
1572
1573 void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1574 {
1575         int irq;
1576         struct irq_info *info;
1577
1578         irq = get_evtchn_to_irq(port);
1579         if (irq == -1)
1580                 return;
1581
1582         /*
1583          * Check for timeout every 256 events.
1584          * We are setting the timeout value only after the first 256
1585          * events in order to not hurt the common case of few loop
1586          * iterations. The 256 is basically an arbitrary value.
1587          *
1588          * In case we are hitting the timeout we need to defer all further
1589          * EOIs in order to ensure to leave the event handling loop rather
1590          * sooner than later.
1591          */
1592         if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1593                 ktime_t kt = ktime_get();
1594
1595                 if (!ctrl->timeout) {
1596                         kt = ktime_add_ms(kt,
1597                                           jiffies_to_msecs(event_loop_timeout));
1598                         ctrl->timeout = kt;
1599                 } else if (kt > ctrl->timeout) {
1600                         ctrl->defer_eoi = true;
1601                 }
1602         }
1603
1604         info = info_for_irq(irq);
1605
1606         if (ctrl->defer_eoi) {
1607                 info->eoi_cpu = smp_processor_id();
1608                 info->irq_epoch = __this_cpu_read(irq_epoch);
1609                 info->eoi_time = get_jiffies_64() + event_eoi_delay;
1610         }
1611
1612         generic_handle_irq(irq);
1613 }
1614
1615 static void __xen_evtchn_do_upcall(void)
1616 {
1617         struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1618         int cpu = smp_processor_id();
1619         struct evtchn_loop_ctrl ctrl = { 0 };
1620
1621         read_lock(&evtchn_rwlock);
1622
1623         do {
1624                 vcpu_info->evtchn_upcall_pending = 0;
1625
1626                 xen_evtchn_handle_events(cpu, &ctrl);
1627
1628                 BUG_ON(!irqs_disabled());
1629
1630                 virt_rmb(); /* Hypervisor can set upcall pending. */
1631
1632         } while (vcpu_info->evtchn_upcall_pending);
1633
1634         read_unlock(&evtchn_rwlock);
1635
1636         /*
1637          * Increment irq_epoch only now to defer EOIs only for
1638          * xen_irq_lateeoi() invocations occurring from inside the loop
1639          * above.
1640          */
1641         __this_cpu_inc(irq_epoch);
1642 }
1643
1644 void xen_evtchn_do_upcall(struct pt_regs *regs)
1645 {
1646         struct pt_regs *old_regs = set_irq_regs(regs);
1647
1648         irq_enter();
1649
1650         __xen_evtchn_do_upcall();
1651
1652         irq_exit();
1653         set_irq_regs(old_regs);
1654 }
1655
1656 void xen_hvm_evtchn_do_upcall(void)
1657 {
1658         __xen_evtchn_do_upcall();
1659 }
1660 EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
1661
1662 /* Rebind a new event channel to an existing irq. */
1663 void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
1664 {
1665         struct irq_info *info = info_for_irq(irq);
1666
1667         if (WARN_ON(!info))
1668                 return;
1669
1670         /* Make sure the irq is masked, since the new event channel
1671            will also be masked. */
1672         disable_irq(irq);
1673
1674         mutex_lock(&irq_mapping_update_lock);
1675
1676         /* After resume the irq<->evtchn mappings are all cleared out */
1677         BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1678         /* Expect irq to have been bound before,
1679            so there should be a proper type */
1680         BUG_ON(info->type == IRQT_UNBOUND);
1681
1682         (void)xen_irq_info_evtchn_setup(irq, evtchn);
1683
1684         mutex_unlock(&irq_mapping_update_lock);
1685
1686         bind_evtchn_to_cpu(evtchn, info->cpu, false);
1687
1688         /* Unmask the event channel. */
1689         enable_irq(irq);
1690 }
1691
1692 /* Rebind an evtchn so that it gets delivered to a specific cpu */
1693 static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
1694 {
1695         struct evtchn_bind_vcpu bind_vcpu;
1696         int masked;
1697
1698         if (!VALID_EVTCHN(evtchn))
1699                 return -1;
1700
1701         if (!xen_support_evtchn_rebind())
1702                 return -1;
1703
1704         /* Send future instances of this interrupt to other vcpu. */
1705         bind_vcpu.port = evtchn;
1706         bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
1707
1708         /*
1709          * Mask the event while changing the VCPU binding to prevent
1710          * it being delivered on an unexpected VCPU.
1711          */
1712         masked = test_and_set_mask(evtchn);
1713
1714         /*
1715          * If this fails, it usually just indicates that we're dealing with a
1716          * virq or IPI channel, which don't actually need to be rebound. Ignore
1717          * it, but don't do the xenlinux-level rebind in that case.
1718          */
1719         if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1720                 bind_evtchn_to_cpu(evtchn, tcpu, false);
1721
1722         if (!masked)
1723                 unmask_evtchn(evtchn);
1724
1725         return 0;
1726 }
1727
1728 /*
1729  * Find the CPU within @dest mask which has the least number of channels
1730  * assigned. This is not precise as the per cpu counts can be modified
1731  * concurrently.
1732  */
1733 static unsigned int select_target_cpu(const struct cpumask *dest)
1734 {
1735         unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
1736
1737         for_each_cpu_and(cpu, dest, cpu_online_mask) {
1738                 unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
1739
1740                 if (curch < minch) {
1741                         minch = curch;
1742                         best_cpu = cpu;
1743                 }
1744         }
1745
1746         /*
1747          * Catch the unlikely case that dest contains no online CPUs. Can't
1748          * recurse.
1749          */
1750         if (best_cpu == UINT_MAX)
1751                 return select_target_cpu(cpu_online_mask);
1752
1753         return best_cpu;
1754 }
1755
1756 static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1757                             bool force)
1758 {
1759         unsigned int tcpu = select_target_cpu(dest);
1760         int ret;
1761
1762         ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
1763         if (!ret)
1764                 irq_data_update_effective_affinity(data, cpumask_of(tcpu));
1765
1766         return ret;
1767 }
1768
1769 static void enable_dynirq(struct irq_data *data)
1770 {
1771         evtchn_port_t evtchn = evtchn_from_irq(data->irq);
1772
1773         if (VALID_EVTCHN(evtchn))
1774                 unmask_evtchn(evtchn);
1775 }
1776
1777 static void disable_dynirq(struct irq_data *data)
1778 {
1779         evtchn_port_t evtchn = evtchn_from_irq(data->irq);
1780
1781         if (VALID_EVTCHN(evtchn))
1782                 mask_evtchn(evtchn);
1783 }
1784
1785 static void ack_dynirq(struct irq_data *data)
1786 {
1787         evtchn_port_t evtchn = evtchn_from_irq(data->irq);
1788
1789         if (!VALID_EVTCHN(evtchn))
1790                 return;
1791
1792         clear_evtchn(evtchn);
1793 }
1794
1795 static void mask_ack_dynirq(struct irq_data *data)
1796 {
1797         disable_dynirq(data);
1798         ack_dynirq(data);
1799 }
1800
1801 static int retrigger_dynirq(struct irq_data *data)
1802 {
1803         evtchn_port_t evtchn = evtchn_from_irq(data->irq);
1804         int masked;
1805
1806         if (!VALID_EVTCHN(evtchn))
1807                 return 0;
1808
1809         masked = test_and_set_mask(evtchn);
1810         set_evtchn(evtchn);
1811         if (!masked)
1812                 unmask_evtchn(evtchn);
1813
1814         return 1;
1815 }
1816
1817 static void restore_pirqs(void)
1818 {
1819         int pirq, rc, irq, gsi;
1820         struct physdev_map_pirq map_irq;
1821         struct irq_info *info;
1822
1823         list_for_each_entry(info, &xen_irq_list_head, list) {
1824                 if (info->type != IRQT_PIRQ)
1825                         continue;
1826
1827                 pirq = info->u.pirq.pirq;
1828                 gsi = info->u.pirq.gsi;
1829                 irq = info->irq;
1830
1831                 /* save/restore of PT devices doesn't work, so at this point the
1832                  * only devices present are GSI based emulated devices */
1833                 if (!gsi)
1834                         continue;
1835
1836                 map_irq.domid = DOMID_SELF;
1837                 map_irq.type = MAP_PIRQ_TYPE_GSI;
1838                 map_irq.index = gsi;
1839                 map_irq.pirq = pirq;
1840
1841                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1842                 if (rc) {
1843                         pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1844                                 gsi, irq, pirq, rc);
1845                         xen_free_irq(irq);
1846                         continue;
1847                 }
1848
1849                 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1850
1851                 __startup_pirq(irq);
1852         }
1853 }
1854
1855 static void restore_cpu_virqs(unsigned int cpu)
1856 {
1857         struct evtchn_bind_virq bind_virq;
1858         evtchn_port_t evtchn;
1859         int virq, irq;
1860
1861         for (virq = 0; virq < NR_VIRQS; virq++) {
1862                 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1863                         continue;
1864
1865                 BUG_ON(virq_from_irq(irq) != virq);
1866
1867                 /* Get a new binding from Xen. */
1868                 bind_virq.virq = virq;
1869                 bind_virq.vcpu = xen_vcpu_nr(cpu);
1870                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1871                                                 &bind_virq) != 0)
1872                         BUG();
1873                 evtchn = bind_virq.port;
1874
1875                 /* Record the new mapping. */
1876                 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1877                 /* The affinity mask is still valid */
1878                 bind_evtchn_to_cpu(evtchn, cpu, false);
1879         }
1880 }
1881
1882 static void restore_cpu_ipis(unsigned int cpu)
1883 {
1884         struct evtchn_bind_ipi bind_ipi;
1885         evtchn_port_t evtchn;
1886         int ipi, irq;
1887
1888         for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
1889                 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
1890                         continue;
1891
1892                 BUG_ON(ipi_from_irq(irq) != ipi);
1893
1894                 /* Get a new binding from Xen. */
1895                 bind_ipi.vcpu = xen_vcpu_nr(cpu);
1896                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1897                                                 &bind_ipi) != 0)
1898                         BUG();
1899                 evtchn = bind_ipi.port;
1900
1901                 /* Record the new mapping. */
1902                 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1903                 /* The affinity mask is still valid */
1904                 bind_evtchn_to_cpu(evtchn, cpu, false);
1905         }
1906 }
1907
1908 /* Clear an irq's pending state, in preparation for polling on it */
1909 void xen_clear_irq_pending(int irq)
1910 {
1911         evtchn_port_t evtchn = evtchn_from_irq(irq);
1912
1913         if (VALID_EVTCHN(evtchn))
1914                 clear_evtchn(evtchn);
1915 }
1916 EXPORT_SYMBOL(xen_clear_irq_pending);
1917 void xen_set_irq_pending(int irq)
1918 {
1919         evtchn_port_t evtchn = evtchn_from_irq(irq);
1920
1921         if (VALID_EVTCHN(evtchn))
1922                 set_evtchn(evtchn);
1923 }
1924
1925 bool xen_test_irq_pending(int irq)
1926 {
1927         evtchn_port_t evtchn = evtchn_from_irq(irq);
1928         bool ret = false;
1929
1930         if (VALID_EVTCHN(evtchn))
1931                 ret = test_evtchn(evtchn);
1932
1933         return ret;
1934 }
1935
1936 /* Poll waiting for an irq to become pending with timeout.  In the usual case,
1937  * the irq will be disabled so it won't deliver an interrupt. */
1938 void xen_poll_irq_timeout(int irq, u64 timeout)
1939 {
1940         evtchn_port_t evtchn = evtchn_from_irq(irq);
1941
1942         if (VALID_EVTCHN(evtchn)) {
1943                 struct sched_poll poll;
1944
1945                 poll.nr_ports = 1;
1946                 poll.timeout = timeout;
1947                 set_xen_guest_handle(poll.ports, &evtchn);
1948
1949                 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
1950                         BUG();
1951         }
1952 }
1953 EXPORT_SYMBOL(xen_poll_irq_timeout);
1954 /* Poll waiting for an irq to become pending.  In the usual case, the
1955  * irq will be disabled so it won't deliver an interrupt. */
1956 void xen_poll_irq(int irq)
1957 {
1958         xen_poll_irq_timeout(irq, 0 /* no timeout */);
1959 }
1960
1961 /* Check whether the IRQ line is shared with other guests. */
1962 int xen_test_irq_shared(int irq)
1963 {
1964         struct irq_info *info = info_for_irq(irq);
1965         struct physdev_irq_status_query irq_status;
1966
1967         if (WARN_ON(!info))
1968                 return -ENOENT;
1969
1970         irq_status.irq = info->u.pirq.pirq;
1971
1972         if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
1973                 return 0;
1974         return !(irq_status.flags & XENIRQSTAT_shared);
1975 }
1976 EXPORT_SYMBOL_GPL(xen_test_irq_shared);
1977
1978 void xen_irq_resume(void)
1979 {
1980         unsigned int cpu;
1981         struct irq_info *info;
1982
1983         /* New event-channel space is not 'live' yet. */
1984         xen_evtchn_resume();
1985
1986         /* No IRQ <-> event-channel mappings. */
1987         list_for_each_entry(info, &xen_irq_list_head, list) {
1988                 /* Zap event-channel binding */
1989                 info->evtchn = 0;
1990                 /* Adjust accounting */
1991                 channels_on_cpu_dec(info);
1992         }
1993
1994         clear_evtchn_to_irq_all();
1995
1996         for_each_possible_cpu(cpu) {
1997                 restore_cpu_virqs(cpu);
1998                 restore_cpu_ipis(cpu);
1999         }
2000
2001         restore_pirqs();
2002 }
2003
2004 static struct irq_chip xen_dynamic_chip __read_mostly = {
2005         .name                   = "xen-dyn",
2006
2007         .irq_disable            = disable_dynirq,
2008         .irq_mask               = disable_dynirq,
2009         .irq_unmask             = enable_dynirq,
2010
2011         .irq_ack                = ack_dynirq,
2012         .irq_mask_ack           = mask_ack_dynirq,
2013
2014         .irq_set_affinity       = set_affinity_irq,
2015         .irq_retrigger          = retrigger_dynirq,
2016 };
2017
2018 static struct irq_chip xen_lateeoi_chip __read_mostly = {
2019         /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2020         .name                   = "xen-dyn-lateeoi",
2021
2022         .irq_disable            = disable_dynirq,
2023         .irq_mask               = disable_dynirq,
2024         .irq_unmask             = enable_dynirq,
2025
2026         .irq_ack                = mask_ack_dynirq,
2027         .irq_mask_ack           = mask_ack_dynirq,
2028
2029         .irq_set_affinity       = set_affinity_irq,
2030         .irq_retrigger          = retrigger_dynirq,
2031 };
2032
2033 static struct irq_chip xen_pirq_chip __read_mostly = {
2034         .name                   = "xen-pirq",
2035
2036         .irq_startup            = startup_pirq,
2037         .irq_shutdown           = shutdown_pirq,
2038         .irq_enable             = enable_pirq,
2039         .irq_disable            = disable_pirq,
2040
2041         .irq_mask               = disable_dynirq,
2042         .irq_unmask             = enable_dynirq,
2043
2044         .irq_ack                = eoi_pirq,
2045         .irq_eoi                = eoi_pirq,
2046         .irq_mask_ack           = mask_ack_pirq,
2047
2048         .irq_set_affinity       = set_affinity_irq,
2049
2050         .irq_retrigger          = retrigger_dynirq,
2051 };
2052
2053 static struct irq_chip xen_percpu_chip __read_mostly = {
2054         .name                   = "xen-percpu",
2055
2056         .irq_disable            = disable_dynirq,
2057         .irq_mask               = disable_dynirq,
2058         .irq_unmask             = enable_dynirq,
2059
2060         .irq_ack                = ack_dynirq,
2061 };
2062
2063 #ifdef CONFIG_XEN_PVHVM
2064 /* Vector callbacks are better than PCI interrupts to receive event
2065  * channel notifications because we can receive vector callbacks on any
2066  * vcpu and we don't need PCI support or APIC interactions. */
2067 void xen_setup_callback_vector(void)
2068 {
2069         uint64_t callback_via;
2070
2071         if (xen_have_vector_callback) {
2072                 callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
2073                 if (xen_set_callback_via(callback_via)) {
2074                         pr_err("Request for Xen HVM callback vector failed\n");
2075                         xen_have_vector_callback = 0;
2076                 }
2077         }
2078 }
2079
2080 static __init void xen_alloc_callback_vector(void)
2081 {
2082         if (!xen_have_vector_callback)
2083                 return;
2084
2085         pr_info("Xen HVM callback vector for event delivery is enabled\n");
2086         alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
2087 }
2088 #else
2089 void xen_setup_callback_vector(void) {}
2090 static inline void xen_alloc_callback_vector(void) {}
2091 #endif
2092
2093 bool xen_fifo_events = true;
2094 module_param_named(fifo_events, xen_fifo_events, bool, 0);
2095
2096 static int xen_evtchn_cpu_prepare(unsigned int cpu)
2097 {
2098         int ret = 0;
2099
2100         xen_cpu_init_eoi(cpu);
2101
2102         if (evtchn_ops->percpu_init)
2103                 ret = evtchn_ops->percpu_init(cpu);
2104
2105         return ret;
2106 }
2107
2108 static int xen_evtchn_cpu_dead(unsigned int cpu)
2109 {
2110         int ret = 0;
2111
2112         if (evtchn_ops->percpu_deinit)
2113                 ret = evtchn_ops->percpu_deinit(cpu);
2114
2115         return ret;
2116 }
2117
2118 void __init xen_init_IRQ(void)
2119 {
2120         int ret = -EINVAL;
2121         evtchn_port_t evtchn;
2122
2123         if (xen_fifo_events)
2124                 ret = xen_evtchn_fifo_init();
2125         if (ret < 0) {
2126                 xen_evtchn_2l_init();
2127                 xen_fifo_events = false;
2128         }
2129
2130         xen_cpu_init_eoi(smp_processor_id());
2131
2132         cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
2133                                   "xen/evtchn:prepare",
2134                                   xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
2135
2136         evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2137                                 sizeof(*evtchn_to_irq), GFP_KERNEL);
2138         BUG_ON(!evtchn_to_irq);
2139
2140         /* No event channels are 'live' right now. */
2141         for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
2142                 mask_evtchn(evtchn);
2143
2144         pirq_needs_eoi = pirq_needs_eoi_flag;
2145
2146 #ifdef CONFIG_X86
2147         if (xen_pv_domain()) {
2148                 if (xen_initial_domain())
2149                         pci_xen_initial_domain();
2150         }
2151         if (xen_feature(XENFEAT_hvm_callback_vector)) {
2152                 xen_setup_callback_vector();
2153                 xen_alloc_callback_vector();
2154         }
2155
2156         if (xen_hvm_domain()) {
2157                 native_init_IRQ();
2158                 /* pci_xen_hvm_init must be called after native_init_IRQ so that
2159                  * __acpi_register_gsi can point at the right function */
2160                 pci_xen_hvm_init();
2161         } else {
2162                 int rc;
2163                 struct physdev_pirq_eoi_gmfn eoi_gmfn;
2164
2165                 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
2166                 eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
2167                 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
2168                 if (rc != 0) {
2169                         free_page((unsigned long) pirq_eoi_map);
2170                         pirq_eoi_map = NULL;
2171                 } else
2172                         pirq_needs_eoi = pirq_check_eoi_map;
2173         }
2174 #endif
2175 }