x86: add debug info for 32bit sparse_irq

[sfrench/cifs-2.6.git] / arch / x86 / kernel / io_apic_64.c
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c

index a1bec2969c6af666d9566ba9a3ba5bcef0a34ee0..30d2e38113135a3a027093bab1260fa8f3d78a18 100644 (file)
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -57,7 +57,12 @@
  
  #define __apicdebuginit(type) static type __init
  
+struct irq_cfg;
+struct irq_pin_list;
  struct irq_cfg {
+       unsigned int irq;
+       struct irq_cfg *next;
+       struct irq_pin_list *irq_2_pin;
         cpumask_t domain;
         cpumask_t old_domain;
         unsigned move_cleanup_count;
@@ -66,25 +71,163 @@ struct irq_cfg {
  };
  
  /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
-       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+static struct irq_cfg irq_cfg_legacy[] __initdata = {
+       [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+       [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+       [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+       [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+       [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+       [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+       [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+       [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+       [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+       [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+       [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+       [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+       [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+       [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+       [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+       [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
  };
  
+static struct irq_cfg irq_cfg_init = { .irq =  -1U, };
+/* need to be biger than size of irq_cfg_legacy */
+static int nr_irq_cfg = 32;
+
+static int __init parse_nr_irq_cfg(char *arg)
+{
+       if (arg) {
+               nr_irq_cfg = simple_strtoul(arg, NULL, 0);
+               if (nr_irq_cfg < 32)
+                       nr_irq_cfg = 32;
+       }
+       return 0;
+}
+
+early_param("nr_irq_cfg", parse_nr_irq_cfg);
+
+static void init_one_irq_cfg(struct irq_cfg *cfg)
+{
+       memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg));
+}
+
+static struct irq_cfg *irq_cfgx;
+static struct irq_cfg *irq_cfgx_free;
+static void __init init_work(void *data)
+{
+       struct dyn_array *da = data;
+       struct irq_cfg *cfg;
+       int legacy_count;
+       int i;
+
+       cfg = *da->name;
+
+       memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy));
+
+       legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]);
+       for (i = legacy_count; i < *da->nr; i++)
+               init_one_irq_cfg(&cfg[i]);
+
+       for (i = 1; i < *da->nr; i++)
+               cfg[i-1].next = &cfg[i];
+
+       irq_cfgx_free = &irq_cfgx[legacy_count];
+       irq_cfgx[legacy_count - 1].next = NULL;
+}
+
+#define for_each_irq_cfg(cfg)          \
+       for (cfg = irq_cfgx; cfg; cfg = cfg->next)
+
+DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work);
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+       struct irq_cfg *cfg;
+
+       cfg = irq_cfgx;
+       while (cfg) {
+               if (cfg->irq == irq)
+                       return cfg;
+
+               cfg = cfg->next;
+       }
+
+       return NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+       struct irq_cfg *cfg, *cfg_pri;
+       int i;
+       int count = 0;
+
+       cfg_pri = cfg = irq_cfgx;
+       while (cfg) {
+               if (cfg->irq == irq)
+                       return cfg;
+
+               cfg_pri = cfg;
+               cfg = cfg->next;
+               count++;
+       }
+
+       if (!irq_cfgx_free) {
+               unsigned long phys;
+               unsigned long total_bytes;
+               /*
+                *  we run out of pre-allocate ones, allocate more
+                */
+               printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg);
+
+               total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg;
+               if (after_bootmem)
+                       cfg = kzalloc(total_bytes, GFP_ATOMIC);
+               else
+                       cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0);
+
+               if (!cfg)
+                       panic("please boot with nr_irq_cfg= %d\n", count * 2);
+
+               phys = __pa(cfg);
+               printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes);
+
+               for (i = 0; i < nr_irq_cfg; i++)
+                       init_one_irq_cfg(&cfg[i]);
+
+               for (i = 1; i < nr_irq_cfg; i++)
+                       cfg[i-1].next = &cfg[i];
+
+               irq_cfgx_free = cfg;
+       }
+
+       cfg = irq_cfgx_free;
+       irq_cfgx_free = irq_cfgx_free->next;
+       cfg->next = NULL;
+       if (cfg_pri)
+               cfg_pri->next = cfg;
+       else
+               irq_cfgx = cfg;
+       cfg->irq = irq;
+       printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq);
+#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG
+       {
+               /* dump the results */
+               struct irq_cfg *cfg;
+               unsigned long phys;
+               unsigned long bytes = sizeof(struct irq_cfg);
+
+               printk(KERN_DEBUG "=========================== %d\n", irq);
+               printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq);
+               for_each_irq_cfg(cfg) {
+                       phys = __pa(cfg);
+                       printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes);
+               }
+               printk(KERN_DEBUG "===========================\n");
+       }
+#endif
+       return cfg;
+}
+
  static int assign_irq_vector(int irq, cpumask_t mask);
  
  int first_system_vector = 0xfe;
@@ -129,8 +272,8 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
   * Rough estimation of how many shared IRQs there are, can
   * be changed anytime.
   */
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+int pin_map_size;
  
  /*
   * This is performance-critical, we want to do it O(1)
@@ -139,9 +282,66 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
   * between pins and IRQs.
   */
  
-static struct irq_pin_list {
-       short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+};
+
+static struct irq_pin_list *irq_2_pin_head;
+/* fill one page ? */
+static int nr_irq_2_pin = 0x100;
+static struct irq_pin_list *irq_2_pin_ptr;
+static void __init irq_2_pin_init_work(void *data)
+{
+       struct dyn_array *da = data;
+       struct irq_pin_list *pin;
+       int i;
+
+       pin = *da->name;
+
+       for (i = 1; i < *da->nr; i++)
+               pin[i-1].next = &pin[i];
+
+       irq_2_pin_ptr = &pin[0];
+}
+DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work);
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+       struct irq_pin_list *pin;
+       int i;
+
+       pin = irq_2_pin_ptr;
+
+       if (pin) {
+               irq_2_pin_ptr = pin->next;
+               pin->next = NULL;
+               return pin;
+       }
+
+       /*
+        *  we run out of pre-allocate ones, allocate more
+        */
+       printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin);
+
+       if (after_bootmem)
+               pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin,
+                                GFP_ATOMIC);
+       else
+               pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) *
+                               nr_irq_2_pin, PAGE_SIZE, 0);
+
+       if (!pin)
+               panic("can not get more irq_2_pin\n");
+
+       for (i = 1; i < nr_irq_2_pin; i++)
+               pin[i-1].next = &pin[i];
+
+       irq_2_pin_ptr = pin->next;
+       pin->next = NULL;
+
+       return pin;
+}
  
  struct io_apic {
         unsigned int index;
@@ -183,16 +383,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
  {
         struct irq_pin_list *entry;
         unsigned long flags;
+       struct irq_cfg *cfg = irq_cfg(irq);
  
         spin_lock_irqsave(&ioapic_lock, flags);
-       entry = irq_2_pin + irq;
+       entry = cfg->irq_2_pin;
         for (;;) {
                 unsigned int reg;
                 int pin;
  
-               pin = entry->pin;
-               if (pin == -1)
+               if (!entry)
                         break;
+               pin = entry->pin;
                 reg = io_apic_read(entry->apic, 0x10 + pin*2);
                 /* Is the remote IRR bit set? */
                 if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -201,7 +402,7 @@ static bool io_apic_level_ack_pending(unsigned int irq)
                 }
                 if (!entry->next)
                         break;
-               entry = irq_2_pin + entry->next;
+               entry = entry->next;
         }
         spin_unlock_irqrestore(&ioapic_lock, flags);
  
@@ -222,21 +423,23 @@ static inline void io_apic_sync(unsigned int apic)
                                                                         \
  {                                                                      \
         int pin;                                                        \
-       struct irq_pin_list *entry = irq_2_pin + irq;                   \
+       struct irq_cfg *cfg;                                            \
+       struct irq_pin_list *entry;                                     \
                                                                         \
-       BUG_ON(irq >= NR_IRQS);                                         \
+       cfg = irq_cfg(irq);                                             \
+       entry = cfg->irq_2_pin;                                         \
         for (;;) {                                                      \
                 unsigned int reg;                                       \
-               pin = entry->pin;                                       \
-               if (pin == -1)                                          \
+               if (!entry)                                             \
                         break;                                          \
+               pin = entry->pin;                                       \
                 reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
                 reg ACTION;                                             \
                 io_apic_modify(entry->apic, reg);                       \
                 FINAL;                                                  \
                 if (!entry->next)                                       \
                         break;                                          \
-               entry = irq_2_pin + entry->next;                        \
+               entry = entry->next;                                    \
         }                                                               \
  }
  
@@ -299,15 +502,19 @@ static void ioapic_mask_entry(int apic, int pin)
  static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
  {
         int apic, pin;
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
  
-       BUG_ON(irq >= NR_IRQS);
+       cfg = irq_cfg(irq);
+       entry = cfg->irq_2_pin;
         for (;;) {
                 unsigned int reg;
+
+               if (!entry)
+                       break;
+
                 apic = entry->apic;
                 pin = entry->pin;
-               if (pin == -1)
-                       break;
                 /*
                  * With interrupt-remapping, destination information comes
                  * from interrupt-remapping table entry.
@@ -320,16 +527,17 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
                 io_apic_modify(apic, reg);
                 if (!entry->next)
                         break;
-               entry = irq_2_pin + entry->next;
+               entry = entry->next;
         }
  }
  
  static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
         unsigned long flags;
         unsigned int dest;
         cpumask_t tmp;
+       struct irq_desc *desc;
  
         cpus_and(tmp, mask, cpu_online_map);
         if (cpus_empty(tmp))
@@ -346,9 +554,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
          */
         dest = SET_APIC_LOGICAL_ID(dest);
  
+       desc = irq_to_desc(irq);
         spin_lock_irqsave(&ioapic_lock, flags);
         __target_IO_APIC_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc->affinity = mask;
         spin_unlock_irqrestore(&ioapic_lock, flags);
  }
  #endif
@@ -358,23 +567,37 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
   * shared ISA-space IRQs, so we have to support them. We are super
   * fast in the common case, and fast for shared ISA-space IRQs.
   */
+int first_free_entry;
  static void add_pin_to_irq(unsigned int irq, int apic, int pin)
  {
-       static int first_free_entry = NR_IRQS;
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
+
+       /* first time to refer irq_cfg, so with new */
+       cfg = irq_cfg_alloc(irq);
+       entry = cfg->irq_2_pin;
+       if (!entry) {
+               entry = get_one_free_irq_2_pin();
+               cfg->irq_2_pin = entry;
+               entry->apic = apic;
+               entry->pin = pin;
+               printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
+               return;
+       }
  
-       BUG_ON(irq >= NR_IRQS);
-       while (entry->next)
-               entry = irq_2_pin + entry->next;
+       while (entry->next) {
+               /* not again, please */
+               if (entry->apic == apic && entry->pin == pin)
+                       return;
  
-       if (entry->pin != -1) {
-               entry->next = first_free_entry;
-               entry = irq_2_pin + entry->next;
-               if (++first_free_entry >= PIN_MAP_SIZE)
-                       panic("io_apic.c: ran out of irq_2_pin entries!");
+               entry = entry->next;
         }
+
+       entry->next = get_one_free_irq_2_pin();
+       entry = entry->next;
         entry->apic = apic;
         entry->pin = pin;
+       printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin);
  }
  
  /*
@@ -384,17 +607,24 @@ static void __init replace_pin_at_irq(unsigned int irq,
                                       int oldapic, int oldpin,
                                       int newapic, int newpin)
  {
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_pin_list *entry = cfg->irq_2_pin;
+       int replaced = 0;
  
-       while (1) {
+       while (entry) {
                 if (entry->apic == oldapic && entry->pin == oldpin) {
                         entry->apic = newapic;
                         entry->pin = newpin;
-               }
-               if (!entry->next)
+                       replaced = 1;
+                       /* every one is different, right? */
                         break;
-               entry = irq_2_pin + entry->next;
+               }
+               entry = entry->next;
         }
+
+       /* why? call replace before add? */
+       if (!replaced)
+               add_pin_to_irq(irq, newapic, newpin);
  }
  
  
@@ -634,7 +864,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
                                 best_guess = irq;
                 }
         }
-       BUG_ON(best_guess >= NR_IRQS);
         return best_guess;
  }
  
@@ -766,7 +995,6 @@ static int pin_2_irq(int idx, int apic, int pin)
                         irq += nr_ioapic_registers[i++];
                 irq += pin;
         }
-       BUG_ON(irq >= NR_IRQS);
         return irq;
  }
  
@@ -801,8 +1029,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
         int cpu;
         struct irq_cfg *cfg;
  
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       cfg = irq_cfg(irq);
  
         /* Only try and allocate irqs on cpus that are present */
         cpus_and(mask, mask, cpu_online_map);
@@ -875,8 +1102,7 @@ static void __clear_irq_vector(int irq)
         cpumask_t mask;
         int cpu, vector;
  
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       cfg = irq_cfg(irq);
         BUG_ON(!cfg->vector);
  
         vector = cfg->vector;
@@ -893,12 +1119,14 @@ void __setup_vector_irq(int cpu)
         /* Initialize vector_irq on a new cpu */
         /* This function must be called with vector_lock held */
         int irq, vector;
+       struct irq_cfg *cfg;
  
         /* Mark the inuse vectors */
-       for (irq = 0; irq < NR_IRQS; ++irq) {
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+       for_each_irq_cfg(cfg) {
+               if (!cpu_isset(cpu, cfg->domain))
                         continue;
-               vector = irq_cfg[irq].vector;
+               vector = cfg->vector;
+               irq = cfg->irq;
                 per_cpu(vector_irq, cpu)[vector] = irq;
         }
         /* Mark the free vectors */
@@ -906,7 +1134,9 @@ void __setup_vector_irq(int cpu)
                 irq = per_cpu(vector_irq, cpu)[vector];
                 if (irq < 0)
                         continue;
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+
+               cfg = irq_cfg(irq);
+               if (!cpu_isset(cpu, cfg->domain))
                         per_cpu(vector_irq, cpu)[vector] = -1;
         }
  }
@@ -918,14 +1148,22 @@ static struct irq_chip ir_ioapic_chip;
  
  static void ioapic_register_intr(int irq, unsigned long trigger)
  {
+       struct irq_desc *desc;
+
+       /* first time to use this irq_desc */
+       if (irq < 16)
+               desc = irq_to_desc(irq);
+       else
+               desc = irq_to_desc_alloc(irq);
+
         if (trigger)
-               irq_desc[irq].status |= IRQ_LEVEL;
+               desc->status |= IRQ_LEVEL;
         else
-               irq_desc[irq].status &= ~IRQ_LEVEL;
+               desc->status &= ~IRQ_LEVEL;
  
  #ifdef CONFIG_INTR_REMAP
         if (irq_remapped(irq)) {
-               irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+               desc->status |= IRQ_MOVE_PCNTXT;
                 if (trigger)
                         set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
                                                       handle_fasteoi_irq,
@@ -1009,13 +1247,15 @@ static int setup_ioapic_entry(int apic, int irq,
  static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                               int trigger, int polarity)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
         struct IO_APIC_route_entry entry;
         cpumask_t mask;
  
         if (!IO_APIC_IRQ(irq))
                 return;
  
+       cfg = irq_cfg(irq);
+
         mask = TARGET_CPUS;
         if (assign_irq_vector(irq, mask))
                 return;
@@ -1125,6 +1365,7 @@ __apicdebuginit(void) print_IO_APIC(void)
         union IO_APIC_reg_01 reg_01;
         union IO_APIC_reg_02 reg_02;
         unsigned long flags;
+       struct irq_cfg *cfg;
  
         if (apic_verbosity == APIC_QUIET)
                 return;
@@ -1153,6 +1394,8 @@ __apicdebuginit(void) print_IO_APIC(void)
         printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
         printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
         printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
  
         printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
         printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
@@ -1193,16 +1436,16 @@ __apicdebuginit(void) print_IO_APIC(void)
         }
         }
         printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for (i = 0; i < NR_IRQS; i++) {
-               struct irq_pin_list *entry = irq_2_pin + i;
-               if (entry->pin < 0)
+       for_each_irq_cfg(cfg) {
+               struct irq_pin_list *entry = cfg->irq_2_pin;
+               if (!entry)
                         continue;
-               printk(KERN_DEBUG "IRQ%d ", i);
+               printk(KERN_DEBUG "IRQ%d ", cfg->irq);
                 for (;;) {
                         printk("-> %d:%d", entry->apic, entry->pin);
                         if (!entry->next)
                                 break;
-                       entry = irq_2_pin + entry->next;
+                       entry = entry->next;
                 }
                 printk("\n");
         }
@@ -1281,8 +1524,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
         printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
  
         icr = apic_icr_read();
-       printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
-       printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
  
         v = apic_read(APIC_LVTT);
         printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1363,14 +1606,9 @@ void __init enable_IO_APIC(void)
  {
         union IO_APIC_reg_01 reg_01;
         int i8259_apic, i8259_pin;
-       int i, apic;
+       int apic;
         unsigned long flags;
  
-       for (i = 0; i < PIN_MAP_SIZE; i++) {
-               irq_2_pin[i].pin = -1;
-               irq_2_pin[i].next = 0;
-       }
-
         /*
          * The number of IO-APIC IRQ registers (== #pins):
          */
@@ -1533,7 +1771,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
  
  static int ioapic_retrigger_irq(unsigned int irq)
  {
-       struct irq_cfg *cfg = &irq_cfg[irq];
+       struct irq_cfg *cfg = irq_cfg(irq);
         unsigned long flags;
  
         spin_lock_irqsave(&vector_lock, flags);
@@ -1580,11 +1818,11 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
   */
  static void migrate_ioapic_irq(int irq, cpumask_t mask)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
         cpumask_t tmp, cleanup_mask;
         struct irte irte;
-       int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       int modify_ioapic_rte;
         unsigned int dest;
         unsigned long flags;
  
@@ -1598,9 +1836,12 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
         if (assign_irq_vector(irq, mask))
                 return;
  
+       cfg = irq_cfg(irq);
         cpus_and(tmp, cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
+       desc = irq_to_desc(irq);
+       modify_ioapic_rte = desc->status & IRQ_LEVEL;
         if (modify_ioapic_rte) {
                 spin_lock_irqsave(&ioapic_lock, flags);
                 __target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1622,12 +1863,13 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
                 cfg->move_in_progress = 0;
         }
  
-       irq_desc[irq].affinity = mask;
+       desc->affinity = mask;
  }
  
  static int migrate_irq_remapped_level(int irq)
  {
         int ret = -1;
+       struct irq_desc *desc = irq_to_desc(irq);
  
         mask_IO_APIC_irq(irq);
  
@@ -1643,11 +1885,11 @@ static int migrate_irq_remapped_level(int irq)
         }
  
         /* everthing is clear. we have right of way */
-       migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+       migrate_ioapic_irq(irq, desc->pending_mask);
  
         ret = 0;
-       irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
-       cpus_clear(irq_desc[irq].pending_mask);
+       desc->status &= ~IRQ_MOVE_PENDING;
+       cpus_clear(desc->pending_mask);
  
  unmask:
         unmask_IO_APIC_irq(irq);
@@ -1656,10 +1898,10 @@ unmask:
  
  static void ir_irq_migration(struct work_struct *work)
  {
-       int irq;
+       unsigned int irq;
+       struct irq_desc *desc;
  
-       for (irq = 0; irq < NR_IRQS; irq++) {
-               struct irq_desc *desc = irq_desc + irq;
+       for_each_irq_desc(irq, desc) {
                 if (desc->status & IRQ_MOVE_PENDING) {
                         unsigned long flags;
  
@@ -1671,8 +1913,7 @@ static void ir_irq_migration(struct work_struct *work)
                                 continue;
                         }
  
-                       desc->chip->set_affinity(irq,
-                                                irq_desc[irq].pending_mask);
+                       desc->chip->set_affinity(irq, desc->pending_mask);
                         spin_unlock_irqrestore(&desc->lock, flags);
                 }
         }
@@ -1683,9 +1924,11 @@ static void ir_irq_migration(struct work_struct *work)
   */
  static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  {
-       if (irq_desc[irq].status & IRQ_LEVEL) {
-               irq_desc[irq].status |= IRQ_MOVE_PENDING;
-               irq_desc[irq].pending_mask = mask;
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (desc->status & IRQ_LEVEL) {
+               desc->status |= IRQ_MOVE_PENDING;
+               desc->pending_mask = mask;
                 migrate_irq_remapped_level(irq);
                 return;
         }
@@ -1707,11 +1950,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                 struct irq_desc *desc;
                 struct irq_cfg *cfg;
                 irq = __get_cpu_var(vector_irq)[vector];
-               if (irq >= NR_IRQS)
+
+               desc = irq_to_desc(irq);
+               if (!desc)
                         continue;
  
-               desc = irq_desc + irq;
-               cfg = irq_cfg + irq;
+               cfg = irq_cfg(irq);
                 spin_lock(&desc->lock);
                 if (!cfg->move_cleanup_count)
                         goto unlock;
@@ -1730,7 +1974,7 @@ unlock:
  
  static void irq_complete_move(unsigned int irq)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
         unsigned vector, me;
  
         if (likely(!cfg->move_in_progress))
@@ -1776,7 +2020,7 @@ static void ack_apic_level(unsigned int irq)
         irq_complete_move(irq);
  #ifdef CONFIG_GENERIC_PENDING_IRQ
         /* If we are moving the irq we need to mask it */
-       if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+       if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
                 do_unmask_irq = 1;
                 mask_IO_APIC_irq(irq);
         }
@@ -1853,6 +2097,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
  static inline void init_IO_APIC_traps(void)
  {
         int irq;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
  
         /*
          * NOTE! The local APIC isn't very good at handling
@@ -1865,8 +2111,9 @@ static inline void init_IO_APIC_traps(void)
          * Also, we've got to be careful not to trash gate
          * 0x80, because int 0x80 is hm, kind of importantish. ;)
          */
-       for (irq = 0; irq < NR_IRQS ; irq++) {
-               if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+       for_each_irq_cfg(cfg) {
+               irq = cfg->irq;
+               if (IO_APIC_IRQ(irq) && !cfg->vector) {
                         /*
                          * Hmm.. We don't have an entry for this,
                          * so default to an old-fashioned 8259
@@ -1874,9 +2121,11 @@ static inline void init_IO_APIC_traps(void)
                          */
                         if (irq < 16)
                                 make_8259A_irq(irq);
-                       else
+                       else {
+                               desc = irq_to_desc(irq);
                                 /* Strange. Oh, well.. */
-                               irq_desc[irq].chip = &no_irq_chip;
+                               desc->chip = &no_irq_chip;
+                       }
                 }
         }
  }
@@ -1911,7 +2160,10 @@ static struct irq_chip lapic_chip __read_mostly = {
  
  static void lapic_register_intr(int irq)
  {
-       irq_desc[irq].status &= ~IRQ_LEVEL;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+       desc->status &= ~IRQ_LEVEL;
         set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
                                       "edge");
  }
@@ -1998,7 +2250,7 @@ static inline void __init unlock_ExtINT_logic(void)
   */
  static inline void __init check_timer(void)
  {
-       struct irq_cfg *cfg = irq_cfg + 0;
+       struct irq_cfg *cfg = irq_cfg(0);
         int apic1, pin1, apic2, pin2;
         unsigned long flags;
         int no_pin1 = 0;
@@ -2270,32 +2522,53 @@ device_initcall(ioapic_init_sysfs);
  /*
   * Dynamic irq allocate and deallocation
   */
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
  {
         /* Allocate an unused irq */
-       int irq;
-       int new;
+       unsigned int irq;
+       unsigned int new;
         unsigned long flags;
+       struct irq_cfg *cfg_new;
  
-       irq = -ENOSPC;
+#ifndef CONFIG_HAVE_SPARSE_IRQ
+       irq_want = nr_irqs - 1;
+#endif
+
+       irq = 0;
         spin_lock_irqsave(&vector_lock, flags);
-       for (new = (NR_IRQS - 1); new >= 0; new--) {
+       for (new = irq_want; new > 0; new--) {
                 if (platform_legacy_irq(new))
                         continue;
-               if (irq_cfg[new].vector != 0)
+               cfg_new = irq_cfg(new);
+               if (cfg_new && cfg_new->vector != 0)
                         continue;
+               /* check if need to create one */
+               if (!cfg_new)
+                       cfg_new = irq_cfg_alloc(new);
                 if (__assign_irq_vector(new, TARGET_CPUS) == 0)
                         irq = new;
                 break;
         }
         spin_unlock_irqrestore(&vector_lock, flags);
  
-       if (irq >= 0) {
+       if (irq > 0) {
                 dynamic_irq_init(irq);
         }
         return irq;
  }
  
+int create_irq(void)
+{
+       int irq;
+
+       irq = create_irq_nr(nr_irqs - 1);
+
+       if (irq == 0)
+               irq = -1;
+
+       return irq;
+}
+
  void destroy_irq(unsigned int irq)
  {
         unsigned long flags;
@@ -2316,7 +2589,7 @@ void destroy_irq(unsigned int irq)
  #ifdef CONFIG_PCI_MSI
  static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
         int err;
         unsigned dest;
         cpumask_t tmp;
@@ -2326,6 +2599,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
         if (err)
                 return err;
  
+       cfg = irq_cfg(irq);
         cpus_and(tmp, cfg->domain, tmp);
         dest = cpu_mask_to_apicid(tmp);
  
@@ -2383,10 +2657,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
  #ifdef CONFIG_SMP
  static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
         struct msi_msg msg;
         unsigned int dest;
         cpumask_t tmp;
+       struct irq_desc *desc;
  
         cpus_and(tmp, mask, cpu_online_map);
         if (cpus_empty(tmp))
@@ -2395,6 +2670,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
         if (assign_irq_vector(irq, mask))
                 return;
  
+       cfg = irq_cfg(irq);
         cpus_and(tmp, cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
@@ -2406,7 +2682,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
         write_msi_msg(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
  }
  
  #ifdef CONFIG_INTR_REMAP
@@ -2416,10 +2693,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
   */
  static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
         unsigned int dest;
         cpumask_t tmp, cleanup_mask;
         struct irte irte;
+       struct irq_desc *desc;
  
         cpus_and(tmp, mask, cpu_online_map);
         if (cpus_empty(tmp))
@@ -2431,6 +2709,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
         if (assign_irq_vector(irq, mask))
                 return;
  
+       cfg = irq_cfg(irq);
         cpus_and(tmp, cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
@@ -2454,7 +2733,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
                 cfg->move_in_progress = 0;
         }
  
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
  }
  #endif
  #endif /* CONFIG_SMP */
@@ -2528,7 +2808,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
  
  #ifdef CONFIG_INTR_REMAP
         if (irq_remapped(irq)) {
-               struct irq_desc *desc = irq_desc + irq;
+               struct irq_desc *desc = irq_to_desc(irq);
                 /*
                  * irq migration in process context
                  */
@@ -2541,13 +2821,29 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
         return 0;
  }
  
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+       unsigned int irq;
+
+       irq = dev->bus->number;
+       irq <<= 8;
+       irq |= dev->devfn;
+       irq <<= 12;
+
+       return irq;
+}
+
  int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
  {
-       int irq, ret;
+       unsigned int irq;
+       int ret;
+       unsigned int irq_want;
+
+       irq_want = build_irq_for_pci_dev(dev) + 0x100;
  
-       irq = create_irq();
-       if (irq < 0)
-               return irq;
+       irq = create_irq_nr(irq_want);
+       if (irq == 0)
+               return -1;
  
  #ifdef CONFIG_INTR_REMAP
         if (!intr_remapping_enabled)
@@ -2574,18 +2870,22 @@ error:
  
  int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
  {
-       int irq, ret, sub_handle;
+       unsigned int irq;
+       int ret, sub_handle;
         struct msi_desc *desc;
+       unsigned int irq_want;
+
  #ifdef CONFIG_INTR_REMAP
         struct intel_iommu *iommu = 0;
         int index = 0;
  #endif
  
+       irq_want = build_irq_for_pci_dev(dev) + 0x100;
         sub_handle = 0;
         list_for_each_entry(desc, &dev->msi_list, list) {
-               irq = create_irq();
-               if (irq < 0)
-                       return irq;
+               irq = create_irq_nr(irq_want--);
+               if (irq == 0)
+                       return -1;
  #ifdef CONFIG_INTR_REMAP
                 if (!intr_remapping_enabled)
                         goto no_ir;
@@ -2636,10 +2936,11 @@ void arch_teardown_msi_irq(unsigned int irq)
  #ifdef CONFIG_SMP
  static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
         struct msi_msg msg;
         unsigned int dest;
         cpumask_t tmp;
+       struct irq_desc *desc;
  
         cpus_and(tmp, mask, cpu_online_map);
         if (cpus_empty(tmp))
@@ -2648,6 +2949,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
         if (assign_irq_vector(irq, mask))
                 return;
  
+       cfg = irq_cfg(irq);
         cpus_and(tmp, cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
@@ -2659,7 +2961,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
         msg.address_lo |= MSI_ADDR_DEST_ID(dest);
  
         dmar_msi_write(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
  }
  #endif /* CONFIG_SMP */
  
@@ -2713,9 +3016,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
  
  static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
         unsigned int dest;
         cpumask_t tmp;
+       struct irq_desc *desc;
  
         cpus_and(tmp, mask, cpu_online_map);
         if (cpus_empty(tmp))
@@ -2724,11 +3028,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
         if (assign_irq_vector(irq, mask))
                 return;
  
+       cfg = irq_cfg(irq);
         cpus_and(tmp, cfg->domain, mask);
         dest = cpu_mask_to_apicid(tmp);
  
         target_ht_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
  }
  #endif
  
@@ -2745,7 +3051,7 @@ static struct irq_chip ht_irq_chip = {
  
  int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
  {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
         int err;
         cpumask_t tmp;
  
@@ -2755,6 +3061,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
                 struct ht_irq_msg msg;
                 unsigned dest;
  
+               cfg = irq_cfg(irq);
                 cpus_and(tmp, cfg->domain, tmp);
                 dest = cpu_mask_to_apicid(tmp);
  
@@ -2853,6 +3160,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
  void __init setup_ioapic_dest(void)
  {
         int pin, ioapic, irq, irq_entry;
+       struct irq_cfg *cfg;
  
         if (skip_ioapic_setup == 1)
                 return;
@@ -2868,7 +3176,8 @@ void __init setup_ioapic_dest(void)
                          * when you have too many devices, because at that time only boot
                          * cpu is online.
                          */
-                       if (!irq_cfg[irq].vector)
+                       cfg = irq_cfg(irq);
+                       if (!cfg->vector)
                                 setup_IO_APIC_irq(ioapic, pin, irq,
                                                   irq_trigger(irq_entry),
                                                   irq_polarity(irq_entry));