Merge master.kernel.org:/pub/scm/linux/kernel/git/herbert/crypto-2.6
[sfrench/cifs-2.6.git] / arch / ia64 / kernel / iosapic.c
1 /*
2  * I/O SAPIC support.
3  *
4  * Copyright (C) 1999 Intel Corp.
5  * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
6  * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
7  * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
8  *      David Mosberger-Tang <davidm@hpl.hp.com>
9  * Copyright (C) 1999 VA Linux Systems
10  * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
11  *
12  * 00/04/19     D. Mosberger    Rewritten to mirror more closely the x86 I/O
13  *                              APIC code.  In particular, we now have separate
14  *                              handlers for edge and level triggered
15  *                              interrupts.
16  * 00/10/27     Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
17  *                              allocation PCI to vector mapping, shared PCI
18  *                              interrupts.
19  * 00/10/27     D. Mosberger    Document things a bit more to make them more
20  *                              understandable.  Clean up much of the old
21  *                              IOSAPIC cruft.
22  * 01/07/27     J.I. Lee        PCI irq routing, Platform/Legacy interrupts
23  *                              and fixes for ACPI S5(SoftOff) support.
24  * 02/01/23     J.I. Lee        iosapic pgm fixes for PCI irq routing from _PRT
25  * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt
26  *                              vectors in iosapic_set_affinity(),
27  *                              initializations for /proc/irq/#/smp_affinity
28  * 02/04/02     P. Diefenbaugh  Cleaned up ACPI PCI IRQ routing.
29  * 02/04/18     J.I. Lee        bug fix in iosapic_init_pci_irq
30  * 02/04/30     J.I. Lee        bug fix in find_iosapic to fix ACPI PCI IRQ to
31  *                              IOSAPIC mapping error
32  * 02/07/29     T. Kochi        Allocate interrupt vectors dynamically
33  * 02/08/04     T. Kochi        Cleaned up terminology (irq, global system
34  *                              interrupt, vector, etc.)
35  * 02/09/20     D. Mosberger    Simplified by taking advantage of ACPI's
36  *                              pci_irq code.
37  * 03/02/19     B. Helgaas      Make pcat_compat system-wide, not per-IOSAPIC.
38  *                              Remove iosapic_address & gsi_base from
39  *                              external interfaces.  Rationalize
40  *                              __init/__devinit attributes.
41  * 04/12/04 Ashok Raj   <ashok.raj@intel.com> Intel Corporation 2004
42  *                              Updated to work with irq migration necessary
43  *                              for CPU Hotplug
44  */
45 /*
46  * Here is what the interrupt logic between a PCI device and the kernel looks
47  * like:
48  *
49  * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
50  *     INTD).  The device is uniquely identified by its bus-, and slot-number
51  *     (the function number does not matter here because all functions share
52  *     the same interrupt lines).
53  *
54  * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
55  *     controller.  Multiple interrupt lines may have to share the same
56  *     IOSAPIC pin (if they're level triggered and use the same polarity).
57  *     Each interrupt line has a unique Global System Interrupt (GSI) number
58  *     which can be calculated as the sum of the controller's base GSI number
59  *     and the IOSAPIC pin number to which the line connects.
60  *
61  * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
62  * IOSAPIC pin into the IA-64 interrupt vector.  This interrupt vector is then
63  * sent to the CPU.
64  *
65  * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is
66  *     used as architecture-independent interrupt handling mechanism in Linux.
67  *     As an IRQ is a number, we have to have
68  *     IA-64 interrupt vector number <-> IRQ number mapping.  On smaller
69  *     systems, we use one-to-one mapping between IA-64 vector and IRQ.  A
70  *     platform can implement platform_irq_to_vector(irq) and
71  *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
72  *     Please see also include/asm-ia64/hw_irq.h for those APIs.
73  *
74  * To sum up, there are three levels of mappings involved:
75  *
76  *      PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
77  *
78  * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
79  * describeinterrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
80  * (isa_irq) is the only exception in this source code.
81  */
82
83 #include <linux/acpi.h>
84 #include <linux/init.h>
85 #include <linux/irq.h>
86 #include <linux/kernel.h>
87 #include <linux/list.h>
88 #include <linux/pci.h>
89 #include <linux/smp.h>
90 #include <linux/string.h>
91 #include <linux/bootmem.h>
92
93 #include <asm/delay.h>
94 #include <asm/hw_irq.h>
95 #include <asm/io.h>
96 #include <asm/iosapic.h>
97 #include <asm/machvec.h>
98 #include <asm/processor.h>
99 #include <asm/ptrace.h>
100 #include <asm/system.h>
101
102 #undef DEBUG_INTERRUPT_ROUTING
103
104 #ifdef DEBUG_INTERRUPT_ROUTING
105 #define DBG(fmt...)     printk(fmt)
106 #else
107 #define DBG(fmt...)
108 #endif
109
110 #define NR_PREALLOCATE_RTE_ENTRIES \
111         (PAGE_SIZE / sizeof(struct iosapic_rte_info))
112 #define RTE_PREALLOCATED        (1)
113
114 static DEFINE_SPINLOCK(iosapic_lock);
115
116 /*
117  * These tables map IA-64 vectors to the IOSAPIC pin that generates this
118  * vector.
119  */
120
121 struct iosapic_rte_info {
122         struct list_head rte_list;      /* node in list of RTEs sharing the
123                                          * same vector */
124         char __iomem    *addr;          /* base address of IOSAPIC */
125         unsigned int    gsi_base;       /* first GSI assigned to this
126                                          * IOSAPIC */
127         char            rte_index;      /* IOSAPIC RTE index */
128         int             refcnt;         /* reference counter */
129         unsigned int    flags;          /* flags */
130 } ____cacheline_aligned;
131
132 static struct iosapic_intr_info {
133         struct list_head rtes;          /* RTEs using this vector (empty =>
134                                          * not an IOSAPIC interrupt) */
135         int             count;          /* # of RTEs that shares this vector */
136         u32             low32;          /* current value of low word of
137                                          * Redirection table entry */
138         unsigned int    dest;           /* destination CPU physical ID */
139         unsigned char   dmode   : 3;    /* delivery mode (see iosapic.h) */
140         unsigned char   polarity: 1;    /* interrupt polarity
141                                          * (see iosapic.h) */
142         unsigned char   trigger : 1;    /* trigger mode (see iosapic.h) */
143 } iosapic_intr_info[IA64_NUM_VECTORS];
144
145 static struct iosapic {
146         char __iomem    *addr;          /* base address of IOSAPIC */
147         unsigned int    gsi_base;       /* first GSI assigned to this
148                                          * IOSAPIC */
149         unsigned short  num_rte;        /* # of RTEs on this IOSAPIC */
150         int             rtes_inuse;     /* # of RTEs in use on this IOSAPIC */
151 #ifdef CONFIG_NUMA
152         unsigned short  node;           /* numa node association via pxm */
153 #endif
154 } iosapic_lists[NR_IOSAPICS];
155
156 static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */
157
158 static int iosapic_kmalloc_ok;
159 static LIST_HEAD(free_rte_list);
160
161 /*
162  * Find an IOSAPIC associated with a GSI
163  */
164 static inline int
165 find_iosapic (unsigned int gsi)
166 {
167         int i;
168
169         for (i = 0; i < NR_IOSAPICS; i++) {
170                 if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
171                     iosapic_lists[i].num_rte)
172                         return i;
173         }
174
175         return -1;
176 }
177
178 static inline int
179 _gsi_to_vector (unsigned int gsi)
180 {
181         struct iosapic_intr_info *info;
182         struct iosapic_rte_info *rte;
183
184         for (info = iosapic_intr_info; info <
185                      iosapic_intr_info + IA64_NUM_VECTORS; ++info)
186                 list_for_each_entry(rte, &info->rtes, rte_list)
187                         if (rte->gsi_base + rte->rte_index == gsi)
188                                 return info - iosapic_intr_info;
189         return -1;
190 }
191
192 /*
193  * Translate GSI number to the corresponding IA-64 interrupt vector.  If no
194  * entry exists, return -1.
195  */
196 inline int
197 gsi_to_vector (unsigned int gsi)
198 {
199         return _gsi_to_vector(gsi);
200 }
201
202 int
203 gsi_to_irq (unsigned int gsi)
204 {
205         unsigned long flags;
206         int irq;
207         /*
208          * XXX fix me: this assumes an identity mapping between IA-64 vector
209          * and Linux irq numbers...
210          */
211         spin_lock_irqsave(&iosapic_lock, flags);
212         {
213                 irq = _gsi_to_vector(gsi);
214         }
215         spin_unlock_irqrestore(&iosapic_lock, flags);
216
217         return irq;
218 }
219
220 static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
221                                                   unsigned int vec)
222 {
223         struct iosapic_rte_info *rte;
224
225         list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
226                 if (rte->gsi_base + rte->rte_index == gsi)
227                         return rte;
228         return NULL;
229 }
230
231 static void
232 set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
233 {
234         unsigned long pol, trigger, dmode;
235         u32 low32, high32;
236         char __iomem *addr;
237         int rte_index;
238         char redir;
239         struct iosapic_rte_info *rte;
240
241         DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
242
243         rte = gsi_vector_to_rte(gsi, vector);
244         if (!rte)
245                 return;         /* not an IOSAPIC interrupt */
246
247         rte_index = rte->rte_index;
248         addr    = rte->addr;
249         pol     = iosapic_intr_info[vector].polarity;
250         trigger = iosapic_intr_info[vector].trigger;
251         dmode   = iosapic_intr_info[vector].dmode;
252
253         redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
254
255 #ifdef CONFIG_SMP
256         {
257                 unsigned int irq;
258
259                 for (irq = 0; irq < NR_IRQS; ++irq)
260                         if (irq_to_vector(irq) == vector) {
261                                 set_irq_affinity_info(irq,
262                                                       (int)(dest & 0xffff),
263                                                       redir);
264                                 break;
265                         }
266         }
267 #endif
268
269         low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
270                  (trigger << IOSAPIC_TRIGGER_SHIFT) |
271                  (dmode << IOSAPIC_DELIVERY_SHIFT) |
272                  ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
273                  vector);
274
275         /* dest contains both id and eid */
276         high32 = (dest << IOSAPIC_DEST_SHIFT);
277
278         iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
279         iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
280         iosapic_intr_info[vector].low32 = low32;
281         iosapic_intr_info[vector].dest = dest;
282 }
283
284 static void
285 nop (unsigned int irq)
286 {
287         /* do nothing... */
288 }
289
290
291 #ifdef CONFIG_KEXEC
292 void
293 kexec_disable_iosapic(void)
294 {
295         struct iosapic_intr_info *info;
296         struct iosapic_rte_info *rte;
297         u8 vec = 0;
298         for (info = iosapic_intr_info; info <
299                         iosapic_intr_info + IA64_NUM_VECTORS; ++info, ++vec) {
300                 list_for_each_entry(rte, &info->rtes,
301                                 rte_list) {
302                         iosapic_write(rte->addr,
303                                         IOSAPIC_RTE_LOW(rte->rte_index),
304                                         IOSAPIC_MASK|vec);
305                         iosapic_eoi(rte->addr, vec);
306                 }
307         }
308 }
309 #endif
310
311 static void
312 mask_irq (unsigned int irq)
313 {
314         unsigned long flags;
315         char __iomem *addr;
316         u32 low32;
317         int rte_index;
318         ia64_vector vec = irq_to_vector(irq);
319         struct iosapic_rte_info *rte;
320
321         if (list_empty(&iosapic_intr_info[vec].rtes))
322                 return;                 /* not an IOSAPIC interrupt! */
323
324         spin_lock_irqsave(&iosapic_lock, flags);
325         {
326                 /* set only the mask bit */
327                 low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
328                 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
329                                     rte_list) {
330                         addr = rte->addr;
331                         rte_index = rte->rte_index;
332                         iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
333                 }
334         }
335         spin_unlock_irqrestore(&iosapic_lock, flags);
336 }
337
338 static void
339 unmask_irq (unsigned int irq)
340 {
341         unsigned long flags;
342         char __iomem *addr;
343         u32 low32;
344         int rte_index;
345         ia64_vector vec = irq_to_vector(irq);
346         struct iosapic_rte_info *rte;
347
348         if (list_empty(&iosapic_intr_info[vec].rtes))
349                 return;                 /* not an IOSAPIC interrupt! */
350
351         spin_lock_irqsave(&iosapic_lock, flags);
352         {
353                 low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
354                 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
355                                     rte_list) {
356                         addr = rte->addr;
357                         rte_index = rte->rte_index;
358                         iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
359                 }
360         }
361         spin_unlock_irqrestore(&iosapic_lock, flags);
362 }
363
364
365 static void
366 iosapic_set_affinity (unsigned int irq, cpumask_t mask)
367 {
368 #ifdef CONFIG_SMP
369         unsigned long flags;
370         u32 high32, low32;
371         int dest, rte_index;
372         char __iomem *addr;
373         int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
374         ia64_vector vec;
375         struct iosapic_rte_info *rte;
376
377         irq &= (~IA64_IRQ_REDIRECTED);
378         vec = irq_to_vector(irq);
379
380         if (cpus_empty(mask))
381                 return;
382
383         dest = cpu_physical_id(first_cpu(mask));
384
385         if (list_empty(&iosapic_intr_info[vec].rtes))
386                 return;                 /* not an IOSAPIC interrupt */
387
388         set_irq_affinity_info(irq, dest, redir);
389
390         /* dest contains both id and eid */
391         high32 = dest << IOSAPIC_DEST_SHIFT;
392
393         spin_lock_irqsave(&iosapic_lock, flags);
394         {
395                 low32 = iosapic_intr_info[vec].low32 &
396                         ~(7 << IOSAPIC_DELIVERY_SHIFT);
397
398                 if (redir)
399                         /* change delivery mode to lowest priority */
400                         low32 |= (IOSAPIC_LOWEST_PRIORITY <<
401                                   IOSAPIC_DELIVERY_SHIFT);
402                 else
403                         /* change delivery mode to fixed */
404                         low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
405
406                 iosapic_intr_info[vec].low32 = low32;
407                 iosapic_intr_info[vec].dest = dest;
408                 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
409                                     rte_list) {
410                         addr = rte->addr;
411                         rte_index = rte->rte_index;
412                         iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
413                                       high32);
414                         iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
415                 }
416         }
417         spin_unlock_irqrestore(&iosapic_lock, flags);
418 #endif
419 }
420
421 /*
422  * Handlers for level-triggered interrupts.
423  */
424
425 static unsigned int
426 iosapic_startup_level_irq (unsigned int irq)
427 {
428         unmask_irq(irq);
429         return 0;
430 }
431
432 static void
433 iosapic_end_level_irq (unsigned int irq)
434 {
435         ia64_vector vec = irq_to_vector(irq);
436         struct iosapic_rte_info *rte;
437
438         move_native_irq(irq);
439         list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
440                 iosapic_eoi(rte->addr, vec);
441 }
442
443 #define iosapic_shutdown_level_irq      mask_irq
444 #define iosapic_enable_level_irq        unmask_irq
445 #define iosapic_disable_level_irq       mask_irq
446 #define iosapic_ack_level_irq           nop
447
448 struct irq_chip irq_type_iosapic_level = {
449         .name =         "IO-SAPIC-level",
450         .startup =      iosapic_startup_level_irq,
451         .shutdown =     iosapic_shutdown_level_irq,
452         .enable =       iosapic_enable_level_irq,
453         .disable =      iosapic_disable_level_irq,
454         .ack =          iosapic_ack_level_irq,
455         .end =          iosapic_end_level_irq,
456         .mask =         mask_irq,
457         .unmask =       unmask_irq,
458         .set_affinity = iosapic_set_affinity
459 };
460
461 /*
462  * Handlers for edge-triggered interrupts.
463  */
464
465 static unsigned int
466 iosapic_startup_edge_irq (unsigned int irq)
467 {
468         unmask_irq(irq);
469         /*
470          * IOSAPIC simply drops interrupts pended while the
471          * corresponding pin was masked, so we can't know if an
472          * interrupt is pending already.  Let's hope not...
473          */
474         return 0;
475 }
476
477 static void
478 iosapic_ack_edge_irq (unsigned int irq)
479 {
480         irq_desc_t *idesc = irq_desc + irq;
481
482         move_native_irq(irq);
483         /*
484          * Once we have recorded IRQ_PENDING already, we can mask the
485          * interrupt for real. This prevents IRQ storms from unhandled
486          * devices.
487          */
488         if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
489             (IRQ_PENDING|IRQ_DISABLED))
490                 mask_irq(irq);
491 }
492
493 #define iosapic_enable_edge_irq         unmask_irq
494 #define iosapic_disable_edge_irq        nop
495 #define iosapic_end_edge_irq            nop
496
497 struct irq_chip irq_type_iosapic_edge = {
498         .name =         "IO-SAPIC-edge",
499         .startup =      iosapic_startup_edge_irq,
500         .shutdown =     iosapic_disable_edge_irq,
501         .enable =       iosapic_enable_edge_irq,
502         .disable =      iosapic_disable_edge_irq,
503         .ack =          iosapic_ack_edge_irq,
504         .end =          iosapic_end_edge_irq,
505         .mask =         mask_irq,
506         .unmask =       unmask_irq,
507         .set_affinity = iosapic_set_affinity
508 };
509
510 unsigned int
511 iosapic_version (char __iomem *addr)
512 {
513         /*
514          * IOSAPIC Version Register return 32 bit structure like:
515          * {
516          *      unsigned int version   : 8;
517          *      unsigned int reserved1 : 8;
518          *      unsigned int max_redir : 8;
519          *      unsigned int reserved2 : 8;
520          * }
521          */
522         return iosapic_read(addr, IOSAPIC_VERSION);
523 }
524
525 static int iosapic_find_sharable_vector (unsigned long trigger,
526                                          unsigned long pol)
527 {
528         int i, vector = -1, min_count = -1;
529         struct iosapic_intr_info *info;
530
531         /*
532          * shared vectors for edge-triggered interrupts are not
533          * supported yet
534          */
535         if (trigger == IOSAPIC_EDGE)
536                 return -1;
537
538         for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
539                 info = &iosapic_intr_info[i];
540                 if (info->trigger == trigger && info->polarity == pol &&
541                     (info->dmode == IOSAPIC_FIXED || info->dmode ==
542                      IOSAPIC_LOWEST_PRIORITY)) {
543                         if (min_count == -1 || info->count < min_count) {
544                                 vector = i;
545                                 min_count = info->count;
546                         }
547                 }
548         }
549
550         return vector;
551 }
552
553 /*
554  * if the given vector is already owned by other,
555  *  assign a new vector for the other and make the vector available
556  */
557 static void __init
558 iosapic_reassign_vector (int vector)
559 {
560         int new_vector;
561
562         if (!list_empty(&iosapic_intr_info[vector].rtes)) {
563                 new_vector = assign_irq_vector(AUTO_ASSIGN);
564                 if (new_vector < 0)
565                         panic("%s: out of interrupt vectors!\n", __FUNCTION__);
566                 printk(KERN_INFO "Reassigning vector %d to %d\n",
567                        vector, new_vector);
568                 memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
569                        sizeof(struct iosapic_intr_info));
570                 INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
571                 list_move(iosapic_intr_info[vector].rtes.next,
572                           &iosapic_intr_info[new_vector].rtes);
573                 memset(&iosapic_intr_info[vector], 0,
574                        sizeof(struct iosapic_intr_info));
575                 iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
576                 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
577         }
578 }
579
580 static struct iosapic_rte_info *iosapic_alloc_rte (void)
581 {
582         int i;
583         struct iosapic_rte_info *rte;
584         int preallocated = 0;
585
586         if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
587                 rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
588                                     NR_PREALLOCATE_RTE_ENTRIES);
589                 if (!rte)
590                         return NULL;
591                 for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
592                         list_add(&rte->rte_list, &free_rte_list);
593         }
594
595         if (!list_empty(&free_rte_list)) {
596                 rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
597                                  rte_list);
598                 list_del(&rte->rte_list);
599                 preallocated++;
600         } else {
601                 rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
602                 if (!rte)
603                         return NULL;
604         }
605
606         memset(rte, 0, sizeof(struct iosapic_rte_info));
607         if (preallocated)
608                 rte->flags |= RTE_PREALLOCATED;
609
610         return rte;
611 }
612
613 static void iosapic_free_rte (struct iosapic_rte_info *rte)
614 {
615         if (rte->flags & RTE_PREALLOCATED)
616                 list_add_tail(&rte->rte_list, &free_rte_list);
617         else
618                 kfree(rte);
619 }
620
621 static inline int vector_is_shared (int vector)
622 {
623         return (iosapic_intr_info[vector].count > 1);
624 }
625
626 static int
627 register_intr (unsigned int gsi, int vector, unsigned char delivery,
628                unsigned long polarity, unsigned long trigger)
629 {
630         irq_desc_t *idesc;
631         struct hw_interrupt_type *irq_type;
632         int rte_index;
633         int index;
634         unsigned long gsi_base;
635         void __iomem *iosapic_address;
636         struct iosapic_rte_info *rte;
637
638         index = find_iosapic(gsi);
639         if (index < 0) {
640                 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
641                        __FUNCTION__, gsi);
642                 return -ENODEV;
643         }
644
645         iosapic_address = iosapic_lists[index].addr;
646         gsi_base = iosapic_lists[index].gsi_base;
647
648         rte = gsi_vector_to_rte(gsi, vector);
649         if (!rte) {
650                 rte = iosapic_alloc_rte();
651                 if (!rte) {
652                         printk(KERN_WARNING "%s: cannot allocate memory\n",
653                                __FUNCTION__);
654                         return -ENOMEM;
655                 }
656
657                 rte_index = gsi - gsi_base;
658                 rte->rte_index  = rte_index;
659                 rte->addr       = iosapic_address;
660                 rte->gsi_base   = gsi_base;
661                 rte->refcnt++;
662                 list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
663                 iosapic_intr_info[vector].count++;
664                 iosapic_lists[index].rtes_inuse++;
665         }
666         else if (vector_is_shared(vector)) {
667                 struct iosapic_intr_info *info = &iosapic_intr_info[vector];
668                 if (info->trigger != trigger || info->polarity != polarity) {
669                         printk (KERN_WARNING
670                                 "%s: cannot override the interrupt\n",
671                                 __FUNCTION__);
672                         return -EINVAL;
673                 }
674         }
675
676         iosapic_intr_info[vector].polarity = polarity;
677         iosapic_intr_info[vector].dmode    = delivery;
678         iosapic_intr_info[vector].trigger  = trigger;
679
680         if (trigger == IOSAPIC_EDGE)
681                 irq_type = &irq_type_iosapic_edge;
682         else
683                 irq_type = &irq_type_iosapic_level;
684
685         idesc = irq_desc + vector;
686         if (idesc->chip != irq_type) {
687                 if (idesc->chip != &no_irq_type)
688                         printk(KERN_WARNING
689                                "%s: changing vector %d from %s to %s\n",
690                                __FUNCTION__, vector,
691                                idesc->chip->name, irq_type->name);
692                 idesc->chip = irq_type;
693         }
694         return 0;
695 }
696
697 static unsigned int
698 get_target_cpu (unsigned int gsi, int vector)
699 {
700 #ifdef CONFIG_SMP
701         static int cpu = -1;
702         extern int cpe_vector;
703
704         /*
705          * In case of vector shared by multiple RTEs, all RTEs that
706          * share the vector need to use the same destination CPU.
707          */
708         if (!list_empty(&iosapic_intr_info[vector].rtes))
709                 return iosapic_intr_info[vector].dest;
710
711         /*
712          * If the platform supports redirection via XTP, let it
713          * distribute interrupts.
714          */
715         if (smp_int_redirect & SMP_IRQ_REDIRECTION)
716                 return cpu_physical_id(smp_processor_id());
717
718         /*
719          * Some interrupts (ACPI SCI, for instance) are registered
720          * before the BSP is marked as online.
721          */
722         if (!cpu_online(smp_processor_id()))
723                 return cpu_physical_id(smp_processor_id());
724
725 #ifdef CONFIG_ACPI
726         if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
727                 return get_cpei_target_cpu();
728 #endif
729
730 #ifdef CONFIG_NUMA
731         {
732                 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
733                 cpumask_t cpu_mask;
734
735                 iosapic_index = find_iosapic(gsi);
736                 if (iosapic_index < 0 ||
737                     iosapic_lists[iosapic_index].node == MAX_NUMNODES)
738                         goto skip_numa_setup;
739
740                 cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
741
742                 for_each_cpu_mask(numa_cpu, cpu_mask) {
743                         if (!cpu_online(numa_cpu))
744                                 cpu_clear(numa_cpu, cpu_mask);
745                 }
746
747                 num_cpus = cpus_weight(cpu_mask);
748
749                 if (!num_cpus)
750                         goto skip_numa_setup;
751
752                 /* Use vector assignment to distribute across cpus in node */
753                 cpu_index = vector % num_cpus;
754
755                 for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
756                         numa_cpu = next_cpu(numa_cpu, cpu_mask);
757
758                 if (numa_cpu != NR_CPUS)
759                         return cpu_physical_id(numa_cpu);
760         }
761 skip_numa_setup:
762 #endif
763         /*
764          * Otherwise, round-robin interrupt vectors across all the
765          * processors.  (It'd be nice if we could be smarter in the
766          * case of NUMA.)
767          */
768         do {
769                 if (++cpu >= NR_CPUS)
770                         cpu = 0;
771         } while (!cpu_online(cpu));
772
773         return cpu_physical_id(cpu);
774 #else  /* CONFIG_SMP */
775         return cpu_physical_id(smp_processor_id());
776 #endif
777 }
778
779 /*
780  * ACPI can describe IOSAPIC interrupts via static tables and namespace
781  * methods.  This provides an interface to register those interrupts and
782  * program the IOSAPIC RTE.
783  */
784 int
785 iosapic_register_intr (unsigned int gsi,
786                        unsigned long polarity, unsigned long trigger)
787 {
788         int vector, mask = 1, err;
789         unsigned int dest;
790         unsigned long flags;
791         struct iosapic_rte_info *rte;
792         u32 low32;
793 again:
794         /*
795          * If this GSI has already been registered (i.e., it's a
796          * shared interrupt, or we lost a race to register it),
797          * don't touch the RTE.
798          */
799         spin_lock_irqsave(&iosapic_lock, flags);
800         {
801                 vector = gsi_to_vector(gsi);
802                 if (vector > 0) {
803                         rte = gsi_vector_to_rte(gsi, vector);
804                         rte->refcnt++;
805                         spin_unlock_irqrestore(&iosapic_lock, flags);
806                         return vector;
807                 }
808         }
809         spin_unlock_irqrestore(&iosapic_lock, flags);
810
811         /* If vector is running out, we try to find a sharable vector */
812         vector = assign_irq_vector(AUTO_ASSIGN);
813         if (vector < 0) {
814                 vector = iosapic_find_sharable_vector(trigger, polarity);
815                 if (vector < 0)
816                         return -ENOSPC;
817         }
818
819         spin_lock_irqsave(&irq_desc[vector].lock, flags);
820         spin_lock(&iosapic_lock);
821         {
822                 if (gsi_to_vector(gsi) > 0) {
823                         if (list_empty(&iosapic_intr_info[vector].rtes))
824                                 free_irq_vector(vector);
825                         spin_unlock(&iosapic_lock);
826                         spin_unlock_irqrestore(&irq_desc[vector].lock,
827                                                flags);
828                         goto again;
829                 }
830
831                 dest = get_target_cpu(gsi, vector);
832                 err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
833                               polarity, trigger);
834                 if (err < 0) {
835                         spin_unlock(&iosapic_lock);
836                         spin_unlock_irqrestore(&irq_desc[vector].lock,
837                                                flags);
838                         return err;
839                 }
840
841                 /*
842                  * If the vector is shared and already unmasked for
843                  * other interrupt sources, don't mask it.
844                  */
845                 low32 = iosapic_intr_info[vector].low32;
846                 if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
847                         mask = 0;
848                 set_rte(gsi, vector, dest, mask);
849         }
850         spin_unlock(&iosapic_lock);
851         spin_unlock_irqrestore(&irq_desc[vector].lock, flags);
852
853         printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
854                gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
855                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
856                cpu_logical_id(dest), dest, vector);
857
858         return vector;
859 }
860
861 void
862 iosapic_unregister_intr (unsigned int gsi)
863 {
864         unsigned long flags;
865         int irq, vector, index;
866         irq_desc_t *idesc;
867         u32 low32;
868         unsigned long trigger, polarity;
869         unsigned int dest;
870         struct iosapic_rte_info *rte;
871
872         /*
873          * If the irq associated with the gsi is not found,
874          * iosapic_unregister_intr() is unbalanced. We need to check
875          * this again after getting locks.
876          */
877         irq = gsi_to_irq(gsi);
878         if (irq < 0) {
879                 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
880                        gsi);
881                 WARN_ON(1);
882                 return;
883         }
884         vector = irq_to_vector(irq);
885
886         idesc = irq_desc + irq;
887         spin_lock_irqsave(&idesc->lock, flags);
888         spin_lock(&iosapic_lock);
889         {
890                 if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
891                         printk(KERN_ERR
892                                "iosapic_unregister_intr(%u) unbalanced\n",
893                                gsi);
894                         WARN_ON(1);
895                         goto out;
896                 }
897
898                 if (--rte->refcnt > 0)
899                         goto out;
900
901                 /* Mask the interrupt */
902                 low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
903                 iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
904                               low32);
905
906                 /* Remove the rte entry from the list */
907                 list_del(&rte->rte_list);
908                 iosapic_intr_info[vector].count--;
909                 iosapic_free_rte(rte);
910                 index = find_iosapic(gsi);
911                 iosapic_lists[index].rtes_inuse--;
912                 WARN_ON(iosapic_lists[index].rtes_inuse < 0);
913
914                 trigger  = iosapic_intr_info[vector].trigger;
915                 polarity = iosapic_intr_info[vector].polarity;
916                 dest     = iosapic_intr_info[vector].dest;
917                 printk(KERN_INFO
918                        "GSI %u (%s, %s) -> CPU %d (0x%04x)"
919                        " vector %d unregistered\n",
920                        gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
921                        (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
922                        cpu_logical_id(dest), dest, vector);
923
924                 if (list_empty(&iosapic_intr_info[vector].rtes)) {
925                         /* Sanity check */
926                         BUG_ON(iosapic_intr_info[vector].count);
927
928                         /* Clear the interrupt controller descriptor */
929                         idesc->chip = &no_irq_type;
930
931 #ifdef CONFIG_SMP
932                         /* Clear affinity */
933                         cpus_setall(idesc->affinity);
934 #endif
935
936                         /* Clear the interrupt information */
937                         memset(&iosapic_intr_info[vector], 0,
938                                sizeof(struct iosapic_intr_info));
939                         iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
940                         INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
941
942                         if (idesc->action) {
943                                 printk(KERN_ERR
944                                        "interrupt handlers still exist on"
945                                        "IRQ %u\n", irq);
946                                 WARN_ON(1);
947                         }
948
949                         /* Free the interrupt vector */
950                         free_irq_vector(vector);
951                 }
952         }
953  out:
954         spin_unlock(&iosapic_lock);
955         spin_unlock_irqrestore(&idesc->lock, flags);
956 }
957
958 /*
959  * ACPI calls this when it finds an entry for a platform interrupt.
960  */
961 int __init
962 iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
963                                 int iosapic_vector, u16 eid, u16 id,
964                                 unsigned long polarity, unsigned long trigger)
965 {
966         static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
967         unsigned char delivery;
968         int vector, mask = 0;
969         unsigned int dest = ((id << 8) | eid) & 0xffff;
970
971         switch (int_type) {
972               case ACPI_INTERRUPT_PMI:
973                 vector = iosapic_vector;
974                 /*
975                  * since PMI vector is alloc'd by FW(ACPI) not by kernel,
976                  * we need to make sure the vector is available
977                  */
978                 iosapic_reassign_vector(vector);
979                 delivery = IOSAPIC_PMI;
980                 break;
981               case ACPI_INTERRUPT_INIT:
982                 vector = assign_irq_vector(AUTO_ASSIGN);
983                 if (vector < 0)
984                         panic("%s: out of interrupt vectors!\n", __FUNCTION__);
985                 delivery = IOSAPIC_INIT;
986                 break;
987               case ACPI_INTERRUPT_CPEI:
988                 vector = IA64_CPE_VECTOR;
989                 delivery = IOSAPIC_LOWEST_PRIORITY;
990                 mask = 1;
991                 break;
992               default:
993                 printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
994                        int_type);
995                 return -1;
996         }
997
998         register_intr(gsi, vector, delivery, polarity, trigger);
999
1000         printk(KERN_INFO
1001                "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
1002                " vector %d\n",
1003                int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
1004                int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
1005                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
1006                cpu_logical_id(dest), dest, vector);
1007
1008         set_rte(gsi, vector, dest, mask);
1009         return vector;
1010 }
1011
1012 /*
1013  * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
1014  */
1015 void __init
1016 iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
1017                           unsigned long polarity,
1018                           unsigned long trigger)
1019 {
1020         int vector;
1021         unsigned int dest = cpu_physical_id(smp_processor_id());
1022
1023         vector = isa_irq_to_vector(isa_irq);
1024
1025         register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
1026
1027         DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
1028             isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
1029             polarity == IOSAPIC_POL_HIGH ? "high" : "low",
1030             cpu_logical_id(dest), dest, vector);
1031
1032         set_rte(gsi, vector, dest, 1);
1033 }
1034
1035 void __init
1036 iosapic_system_init (int system_pcat_compat)
1037 {
1038         int vector;
1039
1040         for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
1041                 iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
1042                 /* mark as unused */
1043                 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
1044         }
1045
1046         pcat_compat = system_pcat_compat;
1047         if (pcat_compat) {
1048                 /*
1049                  * Disable the compatibility mode interrupts (8259 style),
1050                  * needs IN/OUT support enabled.
1051                  */
1052                 printk(KERN_INFO
1053                        "%s: Disabling PC-AT compatible 8259 interrupts\n",
1054                        __FUNCTION__);
1055                 outb(0xff, 0xA1);
1056                 outb(0xff, 0x21);
1057         }
1058 }
1059
1060 static inline int
1061 iosapic_alloc (void)
1062 {
1063         int index;
1064
1065         for (index = 0; index < NR_IOSAPICS; index++)
1066                 if (!iosapic_lists[index].addr)
1067                         return index;
1068
1069         printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
1070         return -1;
1071 }
1072
1073 static inline void
1074 iosapic_free (int index)
1075 {
1076         memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
1077 }
1078
1079 static inline int
1080 iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
1081 {
1082         int index;
1083         unsigned int gsi_end, base, end;
1084
1085         /* check gsi range */
1086         gsi_end = gsi_base + ((ver >> 16) & 0xff);
1087         for (index = 0; index < NR_IOSAPICS; index++) {
1088                 if (!iosapic_lists[index].addr)
1089                         continue;
1090
1091                 base = iosapic_lists[index].gsi_base;
1092                 end  = base + iosapic_lists[index].num_rte - 1;
1093
1094                 if (gsi_end < base || end < gsi_base)
1095                         continue; /* OK */
1096
1097                 return -EBUSY;
1098         }
1099         return 0;
1100 }
1101
1102 int __devinit
1103 iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
1104 {
1105         int num_rte, err, index;
1106         unsigned int isa_irq, ver;
1107         char __iomem *addr;
1108         unsigned long flags;
1109
1110         spin_lock_irqsave(&iosapic_lock, flags);
1111         {
1112                 addr = ioremap(phys_addr, 0);
1113                 ver = iosapic_version(addr);
1114
1115                 if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
1116                         iounmap(addr);
1117                         spin_unlock_irqrestore(&iosapic_lock, flags);
1118                         return err;
1119                 }
1120
1121                 /*
1122                  * The MAX_REDIR register holds the highest input pin
1123                  * number (starting from 0).
1124                  * We add 1 so that we can use it for number of pins (= RTEs)
1125                  */
1126                 num_rte = ((ver >> 16) & 0xff) + 1;
1127
1128                 index = iosapic_alloc();
1129                 iosapic_lists[index].addr = addr;
1130                 iosapic_lists[index].gsi_base = gsi_base;
1131                 iosapic_lists[index].num_rte = num_rte;
1132 #ifdef CONFIG_NUMA
1133                 iosapic_lists[index].node = MAX_NUMNODES;
1134 #endif
1135         }
1136         spin_unlock_irqrestore(&iosapic_lock, flags);
1137
1138         if ((gsi_base == 0) && pcat_compat) {
1139                 /*
1140                  * Map the legacy ISA devices into the IOSAPIC data.  Some of
1141                  * these may get reprogrammed later on with data from the ACPI
1142                  * Interrupt Source Override table.
1143                  */
1144                 for (isa_irq = 0; isa_irq < 16; ++isa_irq)
1145                         iosapic_override_isa_irq(isa_irq, isa_irq,
1146                                                  IOSAPIC_POL_HIGH,
1147                                                  IOSAPIC_EDGE);
1148         }
1149         return 0;
1150 }
1151
1152 #ifdef CONFIG_HOTPLUG
1153 int
1154 iosapic_remove (unsigned int gsi_base)
1155 {
1156         int index, err = 0;
1157         unsigned long flags;
1158
1159         spin_lock_irqsave(&iosapic_lock, flags);
1160         {
1161                 index = find_iosapic(gsi_base);
1162                 if (index < 0) {
1163                         printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
1164                                __FUNCTION__, gsi_base);
1165                         goto out;
1166                 }
1167
1168                 if (iosapic_lists[index].rtes_inuse) {
1169                         err = -EBUSY;
1170                         printk(KERN_WARNING
1171                                "%s: IOSAPIC for GSI base %u is busy\n",
1172                                __FUNCTION__, gsi_base);
1173                         goto out;
1174                 }
1175
1176                 iounmap(iosapic_lists[index].addr);
1177                 iosapic_free(index);
1178         }
1179  out:
1180         spin_unlock_irqrestore(&iosapic_lock, flags);
1181         return err;
1182 }
1183 #endif /* CONFIG_HOTPLUG */
1184
1185 #ifdef CONFIG_NUMA
1186 void __devinit
1187 map_iosapic_to_node(unsigned int gsi_base, int node)
1188 {
1189         int index;
1190
1191         index = find_iosapic(gsi_base);
1192         if (index < 0) {
1193                 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
1194                        __FUNCTION__, gsi_base);
1195                 return;
1196         }
1197         iosapic_lists[index].node = node;
1198         return;
1199 }
1200 #endif
1201
1202 static int __init iosapic_enable_kmalloc (void)
1203 {
1204         iosapic_kmalloc_ok = 1;
1205         return 0;
1206 }
1207 core_initcall (iosapic_enable_kmalloc);