Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / arch / ia64 / kernel / iosapic.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * I/O SAPIC support.
4  *
5  * Copyright (C) 1999 Intel Corp.
6  * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
7  * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
8  * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
9  *      David Mosberger-Tang <davidm@hpl.hp.com>
10  * Copyright (C) 1999 VA Linux Systems
11  * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
12  *
13  * 00/04/19     D. Mosberger    Rewritten to mirror more closely the x86 I/O
14  *                              APIC code.  In particular, we now have separate
15  *                              handlers for edge and level triggered
16  *                              interrupts.
17  * 00/10/27     Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
18  *                              allocation PCI to vector mapping, shared PCI
19  *                              interrupts.
20  * 00/10/27     D. Mosberger    Document things a bit more to make them more
21  *                              understandable.  Clean up much of the old
22  *                              IOSAPIC cruft.
23  * 01/07/27     J.I. Lee        PCI irq routing, Platform/Legacy interrupts
24  *                              and fixes for ACPI S5(SoftOff) support.
25  * 02/01/23     J.I. Lee        iosapic pgm fixes for PCI irq routing from _PRT
26  * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt
27  *                              vectors in iosapic_set_affinity(),
28  *                              initializations for /proc/irq/#/smp_affinity
29  * 02/04/02     P. Diefenbaugh  Cleaned up ACPI PCI IRQ routing.
30  * 02/04/18     J.I. Lee        bug fix in iosapic_init_pci_irq
31  * 02/04/30     J.I. Lee        bug fix in find_iosapic to fix ACPI PCI IRQ to
32  *                              IOSAPIC mapping error
33  * 02/07/29     T. Kochi        Allocate interrupt vectors dynamically
34  * 02/08/04     T. Kochi        Cleaned up terminology (irq, global system
35  *                              interrupt, vector, etc.)
36  * 02/09/20     D. Mosberger    Simplified by taking advantage of ACPI's
37  *                              pci_irq code.
38  * 03/02/19     B. Helgaas      Make pcat_compat system-wide, not per-IOSAPIC.
39  *                              Remove iosapic_address & gsi_base from
40  *                              external interfaces.  Rationalize
41  *                              __init/__devinit attributes.
42  * 04/12/04 Ashok Raj   <ashok.raj@intel.com> Intel Corporation 2004
43  *                              Updated to work with irq migration necessary
44  *                              for CPU Hotplug
45  */
46 /*
47  * Here is what the interrupt logic between a PCI device and the kernel looks
48  * like:
49  *
50  * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
51  *     INTD).  The device is uniquely identified by its bus-, and slot-number
52  *     (the function number does not matter here because all functions share
53  *     the same interrupt lines).
54  *
55  * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
56  *     controller.  Multiple interrupt lines may have to share the same
57  *     IOSAPIC pin (if they're level triggered and use the same polarity).
58  *     Each interrupt line has a unique Global System Interrupt (GSI) number
59  *     which can be calculated as the sum of the controller's base GSI number
60  *     and the IOSAPIC pin number to which the line connects.
61  *
62  * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
63  * IOSAPIC pin into the IA-64 interrupt vector.  This interrupt vector is then
64  * sent to the CPU.
65  *
66  * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is
67  *     used as architecture-independent interrupt handling mechanism in Linux.
68  *     As an IRQ is a number, we have to have
69  *     IA-64 interrupt vector number <-> IRQ number mapping.  On smaller
70  *     systems, we use one-to-one mapping between IA-64 vector and IRQ.
71  *
72  * To sum up, there are three levels of mappings involved:
73  *
74  *      PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
75  *
76  * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
77  * describe interrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
78  * (isa_irq) is the only exception in this source code.
79  */
80
81 #include <linux/acpi.h>
82 #include <linux/init.h>
83 #include <linux/irq.h>
84 #include <linux/kernel.h>
85 #include <linux/list.h>
86 #include <linux/pci.h>
87 #include <linux/slab.h>
88 #include <linux/smp.h>
89 #include <linux/string.h>
90 #include <linux/memblock.h>
91
92 #include <asm/delay.h>
93 #include <asm/hw_irq.h>
94 #include <asm/io.h>
95 #include <asm/iosapic.h>
96 #include <asm/processor.h>
97 #include <asm/ptrace.h>
98
99 #undef DEBUG_INTERRUPT_ROUTING
100
101 #ifdef DEBUG_INTERRUPT_ROUTING
102 #define DBG(fmt...)     printk(fmt)
103 #else
104 #define DBG(fmt...)
105 #endif
106
107 static DEFINE_SPINLOCK(iosapic_lock);
108
109 /*
110  * These tables map IA-64 vectors to the IOSAPIC pin that generates this
111  * vector.
112  */
113
114 #define NO_REF_RTE      0
115
116 static struct iosapic {
117         char __iomem    *addr;          /* base address of IOSAPIC */
118         unsigned int    gsi_base;       /* GSI base */
119         unsigned short  num_rte;        /* # of RTEs on this IOSAPIC */
120         int             rtes_inuse;     /* # of RTEs in use on this IOSAPIC */
121 #ifdef CONFIG_NUMA
122         unsigned short  node;           /* numa node association via pxm */
123 #endif
124         spinlock_t      lock;           /* lock for indirect reg access */
125 } iosapic_lists[NR_IOSAPICS];
126
127 struct iosapic_rte_info {
128         struct list_head rte_list;      /* RTEs sharing the same vector */
129         char            rte_index;      /* IOSAPIC RTE index */
130         int             refcnt;         /* reference counter */
131         struct iosapic  *iosapic;
132 } ____cacheline_aligned;
133
134 static struct iosapic_intr_info {
135         struct list_head rtes;          /* RTEs using this vector (empty =>
136                                          * not an IOSAPIC interrupt) */
137         int             count;          /* # of registered RTEs */
138         u32             low32;          /* current value of low word of
139                                          * Redirection table entry */
140         unsigned int    dest;           /* destination CPU physical ID */
141         unsigned char   dmode   : 3;    /* delivery mode (see iosapic.h) */
142         unsigned char   polarity: 1;    /* interrupt polarity
143                                          * (see iosapic.h) */
144         unsigned char   trigger : 1;    /* trigger mode (see iosapic.h) */
145 } iosapic_intr_info[NR_IRQS];
146
147 static unsigned char pcat_compat;       /* 8259 compatibility flag */
148
149 static inline void
150 iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val)
151 {
152         unsigned long flags;
153
154         spin_lock_irqsave(&iosapic->lock, flags);
155         __iosapic_write(iosapic->addr, reg, val);
156         spin_unlock_irqrestore(&iosapic->lock, flags);
157 }
158
159 /*
160  * Find an IOSAPIC associated with a GSI
161  */
162 static inline int
163 find_iosapic (unsigned int gsi)
164 {
165         int i;
166
167         for (i = 0; i < NR_IOSAPICS; i++) {
168                 if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
169                     iosapic_lists[i].num_rte)
170                         return i;
171         }
172
173         return -1;
174 }
175
176 static inline int __gsi_to_irq(unsigned int gsi)
177 {
178         int irq;
179         struct iosapic_intr_info *info;
180         struct iosapic_rte_info *rte;
181
182         for (irq = 0; irq < NR_IRQS; irq++) {
183                 info = &iosapic_intr_info[irq];
184                 list_for_each_entry(rte, &info->rtes, rte_list)
185                         if (rte->iosapic->gsi_base + rte->rte_index == gsi)
186                                 return irq;
187         }
188         return -1;
189 }
190
191 int
192 gsi_to_irq (unsigned int gsi)
193 {
194         unsigned long flags;
195         int irq;
196
197         spin_lock_irqsave(&iosapic_lock, flags);
198         irq = __gsi_to_irq(gsi);
199         spin_unlock_irqrestore(&iosapic_lock, flags);
200         return irq;
201 }
202
203 static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi)
204 {
205         struct iosapic_rte_info *rte;
206
207         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
208                 if (rte->iosapic->gsi_base + rte->rte_index == gsi)
209                         return rte;
210         return NULL;
211 }
212
213 static void
214 set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask)
215 {
216         unsigned long pol, trigger, dmode;
217         u32 low32, high32;
218         int rte_index;
219         char redir;
220         struct iosapic_rte_info *rte;
221         ia64_vector vector = irq_to_vector(irq);
222
223         DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
224
225         rte = find_rte(irq, gsi);
226         if (!rte)
227                 return;         /* not an IOSAPIC interrupt */
228
229         rte_index = rte->rte_index;
230         pol     = iosapic_intr_info[irq].polarity;
231         trigger = iosapic_intr_info[irq].trigger;
232         dmode   = iosapic_intr_info[irq].dmode;
233
234         redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
235
236 #ifdef CONFIG_SMP
237         set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
238 #endif
239
240         low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
241                  (trigger << IOSAPIC_TRIGGER_SHIFT) |
242                  (dmode << IOSAPIC_DELIVERY_SHIFT) |
243                  ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
244                  vector);
245
246         /* dest contains both id and eid */
247         high32 = (dest << IOSAPIC_DEST_SHIFT);
248
249         iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
250         iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
251         iosapic_intr_info[irq].low32 = low32;
252         iosapic_intr_info[irq].dest = dest;
253 }
254
255 static void
256 iosapic_nop (struct irq_data *data)
257 {
258         /* do nothing... */
259 }
260
261
262 #ifdef CONFIG_KEXEC
263 void
264 kexec_disable_iosapic(void)
265 {
266         struct iosapic_intr_info *info;
267         struct iosapic_rte_info *rte;
268         ia64_vector vec;
269         int irq;
270
271         for (irq = 0; irq < NR_IRQS; irq++) {
272                 info = &iosapic_intr_info[irq];
273                 vec = irq_to_vector(irq);
274                 list_for_each_entry(rte, &info->rtes,
275                                 rte_list) {
276                         iosapic_write(rte->iosapic,
277                                         IOSAPIC_RTE_LOW(rte->rte_index),
278                                         IOSAPIC_MASK|vec);
279                         iosapic_eoi(rte->iosapic->addr, vec);
280                 }
281         }
282 }
283 #endif
284
285 static void
286 mask_irq (struct irq_data *data)
287 {
288         unsigned int irq = data->irq;
289         u32 low32;
290         int rte_index;
291         struct iosapic_rte_info *rte;
292
293         if (!iosapic_intr_info[irq].count)
294                 return;                 /* not an IOSAPIC interrupt! */
295
296         /* set only the mask bit */
297         low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
298         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
299                 rte_index = rte->rte_index;
300                 iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
301         }
302 }
303
304 static void
305 unmask_irq (struct irq_data *data)
306 {
307         unsigned int irq = data->irq;
308         u32 low32;
309         int rte_index;
310         struct iosapic_rte_info *rte;
311
312         if (!iosapic_intr_info[irq].count)
313                 return;                 /* not an IOSAPIC interrupt! */
314
315         low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK;
316         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
317                 rte_index = rte->rte_index;
318                 iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
319         }
320 }
321
322
323 static int
324 iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
325                      bool force)
326 {
327 #ifdef CONFIG_SMP
328         unsigned int irq = data->irq;
329         u32 high32, low32;
330         int cpu, dest, rte_index;
331         int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
332         struct iosapic_rte_info *rte;
333         struct iosapic *iosapic;
334
335         irq &= (~IA64_IRQ_REDIRECTED);
336
337         cpu = cpumask_first_and(cpu_online_mask, mask);
338         if (cpu >= nr_cpu_ids)
339                 return -1;
340
341         if (irq_prepare_move(irq, cpu))
342                 return -1;
343
344         dest = cpu_physical_id(cpu);
345
346         if (!iosapic_intr_info[irq].count)
347                 return -1;                      /* not an IOSAPIC interrupt */
348
349         set_irq_affinity_info(irq, dest, redir);
350
351         /* dest contains both id and eid */
352         high32 = dest << IOSAPIC_DEST_SHIFT;
353
354         low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
355         if (redir)
356                 /* change delivery mode to lowest priority */
357                 low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
358         else
359                 /* change delivery mode to fixed */
360                 low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
361         low32 &= IOSAPIC_VECTOR_MASK;
362         low32 |= irq_to_vector(irq);
363
364         iosapic_intr_info[irq].low32 = low32;
365         iosapic_intr_info[irq].dest = dest;
366         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) {
367                 iosapic = rte->iosapic;
368                 rte_index = rte->rte_index;
369                 iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
370                 iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
371         }
372
373 #endif
374         return 0;
375 }
376
377 /*
378  * Handlers for level-triggered interrupts.
379  */
380
381 static unsigned int
382 iosapic_startup_level_irq (struct irq_data *data)
383 {
384         unmask_irq(data);
385         return 0;
386 }
387
388 static void
389 iosapic_unmask_level_irq (struct irq_data *data)
390 {
391         unsigned int irq = data->irq;
392         ia64_vector vec = irq_to_vector(irq);
393         struct iosapic_rte_info *rte;
394         int do_unmask_irq = 0;
395
396         irq_complete_move(irq);
397         if (unlikely(irqd_is_setaffinity_pending(data))) {
398                 do_unmask_irq = 1;
399                 mask_irq(data);
400         } else
401                 unmask_irq(data);
402
403         list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list)
404                 iosapic_eoi(rte->iosapic->addr, vec);
405
406         if (unlikely(do_unmask_irq)) {
407                 irq_move_masked_irq(data);
408                 unmask_irq(data);
409         }
410 }
411
412 #define iosapic_shutdown_level_irq      mask_irq
413 #define iosapic_enable_level_irq        unmask_irq
414 #define iosapic_disable_level_irq       mask_irq
415 #define iosapic_ack_level_irq           iosapic_nop
416
417 static struct irq_chip irq_type_iosapic_level = {
418         .name =                 "IO-SAPIC-level",
419         .irq_startup =          iosapic_startup_level_irq,
420         .irq_shutdown =         iosapic_shutdown_level_irq,
421         .irq_enable =           iosapic_enable_level_irq,
422         .irq_disable =          iosapic_disable_level_irq,
423         .irq_ack =              iosapic_ack_level_irq,
424         .irq_mask =             mask_irq,
425         .irq_unmask =           iosapic_unmask_level_irq,
426         .irq_set_affinity =     iosapic_set_affinity
427 };
428
429 /*
430  * Handlers for edge-triggered interrupts.
431  */
432
433 static unsigned int
434 iosapic_startup_edge_irq (struct irq_data *data)
435 {
436         unmask_irq(data);
437         /*
438          * IOSAPIC simply drops interrupts pended while the
439          * corresponding pin was masked, so we can't know if an
440          * interrupt is pending already.  Let's hope not...
441          */
442         return 0;
443 }
444
445 static void
446 iosapic_ack_edge_irq (struct irq_data *data)
447 {
448         irq_complete_move(data->irq);
449         irq_move_irq(data);
450 }
451
452 #define iosapic_enable_edge_irq         unmask_irq
453 #define iosapic_disable_edge_irq        iosapic_nop
454
455 static struct irq_chip irq_type_iosapic_edge = {
456         .name =                 "IO-SAPIC-edge",
457         .irq_startup =          iosapic_startup_edge_irq,
458         .irq_shutdown =         iosapic_disable_edge_irq,
459         .irq_enable =           iosapic_enable_edge_irq,
460         .irq_disable =          iosapic_disable_edge_irq,
461         .irq_ack =              iosapic_ack_edge_irq,
462         .irq_mask =             mask_irq,
463         .irq_unmask =           unmask_irq,
464         .irq_set_affinity =     iosapic_set_affinity
465 };
466
467 static unsigned int
468 iosapic_version (char __iomem *addr)
469 {
470         /*
471          * IOSAPIC Version Register return 32 bit structure like:
472          * {
473          *      unsigned int version   : 8;
474          *      unsigned int reserved1 : 8;
475          *      unsigned int max_redir : 8;
476          *      unsigned int reserved2 : 8;
477          * }
478          */
479         return __iosapic_read(addr, IOSAPIC_VERSION);
480 }
481
482 static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol)
483 {
484         int i, irq = -ENOSPC, min_count = -1;
485         struct iosapic_intr_info *info;
486
487         /*
488          * shared vectors for edge-triggered interrupts are not
489          * supported yet
490          */
491         if (trigger == IOSAPIC_EDGE)
492                 return -EINVAL;
493
494         for (i = 0; i < NR_IRQS; i++) {
495                 info = &iosapic_intr_info[i];
496                 if (info->trigger == trigger && info->polarity == pol &&
497                     (info->dmode == IOSAPIC_FIXED ||
498                      info->dmode == IOSAPIC_LOWEST_PRIORITY) &&
499                     can_request_irq(i, IRQF_SHARED)) {
500                         if (min_count == -1 || info->count < min_count) {
501                                 irq = i;
502                                 min_count = info->count;
503                         }
504                 }
505         }
506         return irq;
507 }
508
509 /*
510  * if the given vector is already owned by other,
511  *  assign a new vector for the other and make the vector available
512  */
513 static void __init
514 iosapic_reassign_vector (int irq)
515 {
516         int new_irq;
517
518         if (iosapic_intr_info[irq].count) {
519                 new_irq = create_irq();
520                 if (new_irq < 0)
521                         panic("%s: out of interrupt vectors!\n", __func__);
522                 printk(KERN_INFO "Reassigning vector %d to %d\n",
523                        irq_to_vector(irq), irq_to_vector(new_irq));
524                 memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq],
525                        sizeof(struct iosapic_intr_info));
526                 INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes);
527                 list_move(iosapic_intr_info[irq].rtes.next,
528                           &iosapic_intr_info[new_irq].rtes);
529                 memset(&iosapic_intr_info[irq], 0,
530                        sizeof(struct iosapic_intr_info));
531                 iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
532                 INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
533         }
534 }
535
536 static inline int irq_is_shared (int irq)
537 {
538         return (iosapic_intr_info[irq].count > 1);
539 }
540
541 struct irq_chip*
542 ia64_native_iosapic_get_irq_chip(unsigned long trigger)
543 {
544         if (trigger == IOSAPIC_EDGE)
545                 return &irq_type_iosapic_edge;
546         else
547                 return &irq_type_iosapic_level;
548 }
549
550 static int
551 register_intr (unsigned int gsi, int irq, unsigned char delivery,
552                unsigned long polarity, unsigned long trigger)
553 {
554         struct irq_chip *chip, *irq_type;
555         int index;
556         struct iosapic_rte_info *rte;
557
558         index = find_iosapic(gsi);
559         if (index < 0) {
560                 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
561                        __func__, gsi);
562                 return -ENODEV;
563         }
564
565         rte = find_rte(irq, gsi);
566         if (!rte) {
567                 rte = kzalloc(sizeof (*rte), GFP_ATOMIC);
568                 if (!rte) {
569                         printk(KERN_WARNING "%s: cannot allocate memory\n",
570                                __func__);
571                         return -ENOMEM;
572                 }
573
574                 rte->iosapic    = &iosapic_lists[index];
575                 rte->rte_index  = gsi - rte->iosapic->gsi_base;
576                 rte->refcnt++;
577                 list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes);
578                 iosapic_intr_info[irq].count++;
579                 iosapic_lists[index].rtes_inuse++;
580         }
581         else if (rte->refcnt == NO_REF_RTE) {
582                 struct iosapic_intr_info *info = &iosapic_intr_info[irq];
583                 if (info->count > 0 &&
584                     (info->trigger != trigger || info->polarity != polarity)){
585                         printk (KERN_WARNING
586                                 "%s: cannot override the interrupt\n",
587                                 __func__);
588                         return -EINVAL;
589                 }
590                 rte->refcnt++;
591                 iosapic_intr_info[irq].count++;
592                 iosapic_lists[index].rtes_inuse++;
593         }
594
595         iosapic_intr_info[irq].polarity = polarity;
596         iosapic_intr_info[irq].dmode    = delivery;
597         iosapic_intr_info[irq].trigger  = trigger;
598
599         irq_type = iosapic_get_irq_chip(trigger);
600
601         chip = irq_get_chip(irq);
602         if (irq_type != NULL && chip != irq_type) {
603                 if (chip != &no_irq_chip)
604                         printk(KERN_WARNING
605                                "%s: changing vector %d from %s to %s\n",
606                                __func__, irq_to_vector(irq),
607                                chip->name, irq_type->name);
608                 chip = irq_type;
609         }
610         irq_set_chip_handler_name_locked(irq_get_irq_data(irq), chip,
611                 trigger == IOSAPIC_EDGE ? handle_edge_irq : handle_level_irq,
612                 NULL);
613         return 0;
614 }
615
616 static unsigned int
617 get_target_cpu (unsigned int gsi, int irq)
618 {
619 #ifdef CONFIG_SMP
620         static int cpu = -1;
621         extern int cpe_vector;
622         cpumask_t domain = irq_to_domain(irq);
623
624         /*
625          * In case of vector shared by multiple RTEs, all RTEs that
626          * share the vector need to use the same destination CPU.
627          */
628         if (iosapic_intr_info[irq].count)
629                 return iosapic_intr_info[irq].dest;
630
631         /*
632          * If the platform supports redirection via XTP, let it
633          * distribute interrupts.
634          */
635         if (smp_int_redirect & SMP_IRQ_REDIRECTION)
636                 return cpu_physical_id(smp_processor_id());
637
638         /*
639          * Some interrupts (ACPI SCI, for instance) are registered
640          * before the BSP is marked as online.
641          */
642         if (!cpu_online(smp_processor_id()))
643                 return cpu_physical_id(smp_processor_id());
644
645         if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR)
646                 return get_cpei_target_cpu();
647
648 #ifdef CONFIG_NUMA
649         {
650                 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
651                 const struct cpumask *cpu_mask;
652
653                 iosapic_index = find_iosapic(gsi);
654                 if (iosapic_index < 0 ||
655                     iosapic_lists[iosapic_index].node == MAX_NUMNODES)
656                         goto skip_numa_setup;
657
658                 cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
659                 num_cpus = 0;
660                 for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
661                         if (cpu_online(numa_cpu))
662                                 num_cpus++;
663                 }
664
665                 if (!num_cpus)
666                         goto skip_numa_setup;
667
668                 /* Use irq assignment to distribute across cpus in node */
669                 cpu_index = irq % num_cpus;
670
671                 for_each_cpu_and(numa_cpu, cpu_mask, &domain)
672                         if (cpu_online(numa_cpu) && i++ >= cpu_index)
673                                 break;
674
675                 if (numa_cpu < nr_cpu_ids)
676                         return cpu_physical_id(numa_cpu);
677         }
678 skip_numa_setup:
679 #endif
680         /*
681          * Otherwise, round-robin interrupt vectors across all the
682          * processors.  (It'd be nice if we could be smarter in the
683          * case of NUMA.)
684          */
685         do {
686                 if (++cpu >= nr_cpu_ids)
687                         cpu = 0;
688         } while (!cpu_online(cpu) || !cpumask_test_cpu(cpu, &domain));
689
690         return cpu_physical_id(cpu);
691 #else  /* CONFIG_SMP */
692         return cpu_physical_id(smp_processor_id());
693 #endif
694 }
695
696 static inline unsigned char choose_dmode(void)
697 {
698 #ifdef CONFIG_SMP
699         if (smp_int_redirect & SMP_IRQ_REDIRECTION)
700                 return IOSAPIC_LOWEST_PRIORITY;
701 #endif
702         return IOSAPIC_FIXED;
703 }
704
705 /*
706  * ACPI can describe IOSAPIC interrupts via static tables and namespace
707  * methods.  This provides an interface to register those interrupts and
708  * program the IOSAPIC RTE.
709  */
710 int
711 iosapic_register_intr (unsigned int gsi,
712                        unsigned long polarity, unsigned long trigger)
713 {
714         int irq, mask = 1, err;
715         unsigned int dest;
716         unsigned long flags;
717         struct iosapic_rte_info *rte;
718         u32 low32;
719         unsigned char dmode;
720         struct irq_desc *desc;
721
722         /*
723          * If this GSI has already been registered (i.e., it's a
724          * shared interrupt, or we lost a race to register it),
725          * don't touch the RTE.
726          */
727         spin_lock_irqsave(&iosapic_lock, flags);
728         irq = __gsi_to_irq(gsi);
729         if (irq > 0) {
730                 rte = find_rte(irq, gsi);
731                 if(iosapic_intr_info[irq].count == 0) {
732                         assign_irq_vector(irq);
733                         irq_init_desc(irq);
734                 } else if (rte->refcnt != NO_REF_RTE) {
735                         rte->refcnt++;
736                         goto unlock_iosapic_lock;
737                 }
738         } else
739                 irq = create_irq();
740
741         /* If vector is running out, we try to find a sharable vector */
742         if (irq < 0) {
743                 irq = iosapic_find_sharable_irq(trigger, polarity);
744                 if (irq < 0)
745                         goto unlock_iosapic_lock;
746         }
747
748         desc = irq_to_desc(irq);
749         raw_spin_lock(&desc->lock);
750         dest = get_target_cpu(gsi, irq);
751         dmode = choose_dmode();
752         err = register_intr(gsi, irq, dmode, polarity, trigger);
753         if (err < 0) {
754                 raw_spin_unlock(&desc->lock);
755                 irq = err;
756                 goto unlock_iosapic_lock;
757         }
758
759         /*
760          * If the vector is shared and already unmasked for other
761          * interrupt sources, don't mask it.
762          */
763         low32 = iosapic_intr_info[irq].low32;
764         if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK))
765                 mask = 0;
766         set_rte(gsi, irq, dest, mask);
767
768         printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
769                gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
770                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
771                cpu_logical_id(dest), dest, irq_to_vector(irq));
772
773         raw_spin_unlock(&desc->lock);
774  unlock_iosapic_lock:
775         spin_unlock_irqrestore(&iosapic_lock, flags);
776         return irq;
777 }
778
779 void
780 iosapic_unregister_intr (unsigned int gsi)
781 {
782         unsigned long flags;
783         int irq, index;
784         u32 low32;
785         unsigned long trigger, polarity;
786         unsigned int dest;
787         struct iosapic_rte_info *rte;
788
789         /*
790          * If the irq associated with the gsi is not found,
791          * iosapic_unregister_intr() is unbalanced. We need to check
792          * this again after getting locks.
793          */
794         irq = gsi_to_irq(gsi);
795         if (irq < 0) {
796                 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
797                        gsi);
798                 WARN_ON(1);
799                 return;
800         }
801
802         spin_lock_irqsave(&iosapic_lock, flags);
803         if ((rte = find_rte(irq, gsi)) == NULL) {
804                 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
805                        gsi);
806                 WARN_ON(1);
807                 goto out;
808         }
809
810         if (--rte->refcnt > 0)
811                 goto out;
812
813         rte->refcnt = NO_REF_RTE;
814
815         /* Mask the interrupt */
816         low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK;
817         iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32);
818
819         iosapic_intr_info[irq].count--;
820         index = find_iosapic(gsi);
821         iosapic_lists[index].rtes_inuse--;
822         WARN_ON(iosapic_lists[index].rtes_inuse < 0);
823
824         trigger  = iosapic_intr_info[irq].trigger;
825         polarity = iosapic_intr_info[irq].polarity;
826         dest     = iosapic_intr_info[irq].dest;
827         printk(KERN_INFO
828                "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n",
829                gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
830                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
831                cpu_logical_id(dest), dest, irq_to_vector(irq));
832
833         if (iosapic_intr_info[irq].count == 0) {
834 #ifdef CONFIG_SMP
835                 /* Clear affinity */
836                 cpumask_setall(irq_get_affinity_mask(irq));
837 #endif
838                 /* Clear the interrupt information */
839                 iosapic_intr_info[irq].dest = 0;
840                 iosapic_intr_info[irq].dmode = 0;
841                 iosapic_intr_info[irq].polarity = 0;
842                 iosapic_intr_info[irq].trigger = 0;
843                 iosapic_intr_info[irq].low32 |= IOSAPIC_MASK;
844
845                 /* Destroy and reserve IRQ */
846                 destroy_and_reserve_irq(irq);
847         }
848  out:
849         spin_unlock_irqrestore(&iosapic_lock, flags);
850 }
851
852 /*
853  * ACPI calls this when it finds an entry for a platform interrupt.
854  */
855 int __init
856 iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
857                                 int iosapic_vector, u16 eid, u16 id,
858                                 unsigned long polarity, unsigned long trigger)
859 {
860         static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
861         unsigned char delivery;
862         int irq, vector, mask = 0;
863         unsigned int dest = ((id << 8) | eid) & 0xffff;
864
865         switch (int_type) {
866               case ACPI_INTERRUPT_PMI:
867                 irq = vector = iosapic_vector;
868                 bind_irq_vector(irq, vector, CPU_MASK_ALL);
869                 /*
870                  * since PMI vector is alloc'd by FW(ACPI) not by kernel,
871                  * we need to make sure the vector is available
872                  */
873                 iosapic_reassign_vector(irq);
874                 delivery = IOSAPIC_PMI;
875                 break;
876               case ACPI_INTERRUPT_INIT:
877                 irq = create_irq();
878                 if (irq < 0)
879                         panic("%s: out of interrupt vectors!\n", __func__);
880                 vector = irq_to_vector(irq);
881                 delivery = IOSAPIC_INIT;
882                 break;
883               case ACPI_INTERRUPT_CPEI:
884                 irq = vector = IA64_CPE_VECTOR;
885                 BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
886                 delivery = IOSAPIC_FIXED;
887                 mask = 1;
888                 break;
889               default:
890                 printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__,
891                        int_type);
892                 return -1;
893         }
894
895         register_intr(gsi, irq, delivery, polarity, trigger);
896
897         printk(KERN_INFO
898                "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
899                " vector %d\n",
900                int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
901                int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
902                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
903                cpu_logical_id(dest), dest, vector);
904
905         set_rte(gsi, irq, dest, mask);
906         return vector;
907 }
908
909 /*
910  * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
911  */
912 void iosapic_override_isa_irq(unsigned int isa_irq, unsigned int gsi,
913                               unsigned long polarity, unsigned long trigger)
914 {
915         int vector, irq;
916         unsigned int dest = cpu_physical_id(smp_processor_id());
917         unsigned char dmode;
918
919         irq = vector = isa_irq_to_vector(isa_irq);
920         BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL));
921         dmode = choose_dmode();
922         register_intr(gsi, irq, dmode, polarity, trigger);
923
924         DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
925             isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
926             polarity == IOSAPIC_POL_HIGH ? "high" : "low",
927             cpu_logical_id(dest), dest, vector);
928
929         set_rte(gsi, irq, dest, 1);
930 }
931
932 void __init
933 ia64_native_iosapic_pcat_compat_init(void)
934 {
935         if (pcat_compat) {
936                 /*
937                  * Disable the compatibility mode interrupts (8259 style),
938                  * needs IN/OUT support enabled.
939                  */
940                 printk(KERN_INFO
941                        "%s: Disabling PC-AT compatible 8259 interrupts\n",
942                        __func__);
943                 outb(0xff, 0xA1);
944                 outb(0xff, 0x21);
945         }
946 }
947
948 void __init
949 iosapic_system_init (int system_pcat_compat)
950 {
951         int irq;
952
953         for (irq = 0; irq < NR_IRQS; ++irq) {
954                 iosapic_intr_info[irq].low32 = IOSAPIC_MASK;
955                 /* mark as unused */
956                 INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes);
957
958                 iosapic_intr_info[irq].count = 0;
959         }
960
961         pcat_compat = system_pcat_compat;
962         if (pcat_compat)
963                 iosapic_pcat_compat_init();
964 }
965
966 static inline int
967 iosapic_alloc (void)
968 {
969         int index;
970
971         for (index = 0; index < NR_IOSAPICS; index++)
972                 if (!iosapic_lists[index].addr)
973                         return index;
974
975         printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__);
976         return -1;
977 }
978
979 static inline void
980 iosapic_free (int index)
981 {
982         memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
983 }
984
985 static inline int
986 iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
987 {
988         int index;
989         unsigned int gsi_end, base, end;
990
991         /* check gsi range */
992         gsi_end = gsi_base + ((ver >> 16) & 0xff);
993         for (index = 0; index < NR_IOSAPICS; index++) {
994                 if (!iosapic_lists[index].addr)
995                         continue;
996
997                 base = iosapic_lists[index].gsi_base;
998                 end  = base + iosapic_lists[index].num_rte - 1;
999
1000                 if (gsi_end < base || end < gsi_base)
1001                         continue; /* OK */
1002
1003                 return -EBUSY;
1004         }
1005         return 0;
1006 }
1007
1008 static int
1009 iosapic_delete_rte(unsigned int irq, unsigned int gsi)
1010 {
1011         struct iosapic_rte_info *rte, *temp;
1012
1013         list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes,
1014                                                                 rte_list) {
1015                 if (rte->iosapic->gsi_base + rte->rte_index == gsi) {
1016                         if (rte->refcnt)
1017                                 return -EBUSY;
1018
1019                         list_del(&rte->rte_list);
1020                         kfree(rte);
1021                         return 0;
1022                 }
1023         }
1024
1025         return -EINVAL;
1026 }
1027
1028 int iosapic_init(unsigned long phys_addr, unsigned int gsi_base)
1029 {
1030         int num_rte, err, index;
1031         unsigned int isa_irq, ver;
1032         char __iomem *addr;
1033         unsigned long flags;
1034
1035         spin_lock_irqsave(&iosapic_lock, flags);
1036         index = find_iosapic(gsi_base);
1037         if (index >= 0) {
1038                 spin_unlock_irqrestore(&iosapic_lock, flags);
1039                 return -EBUSY;
1040         }
1041
1042         addr = ioremap(phys_addr, 0);
1043         if (addr == NULL) {
1044                 spin_unlock_irqrestore(&iosapic_lock, flags);
1045                 return -ENOMEM;
1046         }
1047         ver = iosapic_version(addr);
1048         if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
1049                 iounmap(addr);
1050                 spin_unlock_irqrestore(&iosapic_lock, flags);
1051                 return err;
1052         }
1053
1054         /*
1055          * The MAX_REDIR register holds the highest input pin number
1056          * (starting from 0).  We add 1 so that we can use it for
1057          * number of pins (= RTEs)
1058          */
1059         num_rte = ((ver >> 16) & 0xff) + 1;
1060
1061         index = iosapic_alloc();
1062         iosapic_lists[index].addr = addr;
1063         iosapic_lists[index].gsi_base = gsi_base;
1064         iosapic_lists[index].num_rte = num_rte;
1065 #ifdef CONFIG_NUMA
1066         iosapic_lists[index].node = MAX_NUMNODES;
1067 #endif
1068         spin_lock_init(&iosapic_lists[index].lock);
1069         spin_unlock_irqrestore(&iosapic_lock, flags);
1070
1071         if ((gsi_base == 0) && pcat_compat) {
1072                 /*
1073                  * Map the legacy ISA devices into the IOSAPIC data.  Some of
1074                  * these may get reprogrammed later on with data from the ACPI
1075                  * Interrupt Source Override table.
1076                  */
1077                 for (isa_irq = 0; isa_irq < 16; ++isa_irq)
1078                         iosapic_override_isa_irq(isa_irq, isa_irq,
1079                                                  IOSAPIC_POL_HIGH,
1080                                                  IOSAPIC_EDGE);
1081         }
1082         return 0;
1083 }
1084
1085 int iosapic_remove(unsigned int gsi_base)
1086 {
1087         int i, irq, index, err = 0;
1088         unsigned long flags;
1089
1090         spin_lock_irqsave(&iosapic_lock, flags);
1091         index = find_iosapic(gsi_base);
1092         if (index < 0) {
1093                 printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
1094                        __func__, gsi_base);
1095                 goto out;
1096         }
1097
1098         if (iosapic_lists[index].rtes_inuse) {
1099                 err = -EBUSY;
1100                 printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n",
1101                        __func__, gsi_base);
1102                 goto out;
1103         }
1104
1105         for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) {
1106                 irq = __gsi_to_irq(i);
1107                 if (irq < 0)
1108                         continue;
1109
1110                 err = iosapic_delete_rte(irq, i);
1111                 if (err)
1112                         goto out;
1113         }
1114
1115         iounmap(iosapic_lists[index].addr);
1116         iosapic_free(index);
1117  out:
1118         spin_unlock_irqrestore(&iosapic_lock, flags);
1119         return err;
1120 }
1121
1122 #ifdef CONFIG_NUMA
1123 void map_iosapic_to_node(unsigned int gsi_base, int node)
1124 {
1125         int index;
1126
1127         index = find_iosapic(gsi_base);
1128         if (index < 0) {
1129                 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
1130                        __func__, gsi_base);
1131                 return;
1132         }
1133         iosapic_lists[index].node = node;
1134         return;
1135 }
1136 #endif