x86/apic: Support 15 bits of APIC ID in MSI where available
authorDavid Woodhouse <dwmw@amazon.co.uk>
Sat, 24 Oct 2020 21:35:32 +0000 (22:35 +0100)
committerThomas Gleixner <tglx@linutronix.de>
Wed, 28 Oct 2020 19:26:29 +0000 (20:26 +0100)
Some hypervisors can allow the guest to use the Extended Destination ID
field in the MSI address to address up to 32768 CPUs.

This applies to all downstream devices which generate MSI cycles,
including HPET, I/O-APIC and PCI MSI.

HPET and PCI MSI use the same __irq_msi_compose_msg() function, while
I/O-APIC generates its own and had support for the extended bits added in
a previous commit.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20201024213535.443185-33-dwmw2@infradead.org
arch/x86/include/asm/msi.h
arch/x86/include/asm/x86_init.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/x86_init.c

index 322fd905da9c912dd833f8b94d933344cfe5ed2e..b85147d75626e366422b7ed9d290e19ff19c3a4e 100644 (file)
@@ -29,7 +29,8 @@ typedef struct x86_msi_addr_lo {
                        u32     reserved_0              :  2,
                                dest_mode_logical       :  1,
                                redirect_hint           :  1,
-                               reserved_1              :  8,
+                               reserved_1              :  1,
+                               virt_destid_8_14        :  7,
                                destid_0_7              :  8,
                                base_address            : 12;
                };
index dde5b3f1e7cd56367d0382bd1f591fd940ee70d5..5c69f7eb5d47e81e9b4d4f14a30041fe0465d9ab 100644 (file)
@@ -116,6 +116,7 @@ struct x86_init_pci {
  * @init_platform:             platform setup
  * @guest_late_init:           guest late init
  * @x2apic_available:          X2APIC detection
+ * @msi_ext_dest_id:           MSI supports 15-bit APIC IDs
  * @init_mem_mapping:          setup early mappings during init_mem_mapping()
  * @init_after_bootmem:                guest init after boot allocator is finished
  */
@@ -123,6 +124,7 @@ struct x86_hyper_init {
        void (*init_platform)(void);
        void (*guest_late_init)(void);
        bool (*x2apic_available)(void);
+       bool (*msi_ext_dest_id)(void);
        void (*init_mem_mapping)(void);
        void (*init_after_bootmem)(void);
 };
index f7196ee0f005e674401053e073dc15b52a9cea1f..6bd20c0de8bc605c9f9d82a1c76060e71b13d3fa 100644 (file)
@@ -93,6 +93,11 @@ static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
  */
 static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
 
+/*
+ * Hypervisor supports 15 bits of APIC ID in MSI Extended Destination ID
+ */
+static bool virt_ext_dest_id __ro_after_init;
+
 /*
  * Map cpu index to physical APIC ID
  */
@@ -1841,6 +1846,8 @@ static __init void try_to_enable_x2apic(int remap_mode)
                return;
 
        if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
+               u32 apic_limit = 255;
+
                /*
                 * Using X2APIC without IR is not architecturally supported
                 * on bare metal but may be supported in guests.
@@ -1851,12 +1858,22 @@ static __init void try_to_enable_x2apic(int remap_mode)
                        return;
                }
 
+               /*
+                * If the hypervisor supports extended destination ID in
+                * MSI, that increases the maximum APIC ID that can be
+                * used for non-remapped IRQ domains.
+                */
+               if (x86_init.hyper.msi_ext_dest_id()) {
+                       virt_ext_dest_id = 1;
+                       apic_limit = 32767;
+               }
+
                /*
                 * Without IR, all CPUs can be addressed by IOAPIC/MSI only
                 * in physical mode, and CPUs with an APIC ID that cannnot
                 * be addressed must not be brought online.
                 */
-               x2apic_set_max_apicid(255);
+               x2apic_set_max_apicid(apic_limit);
                x2apic_phys = 1;
        }
        x2apic_enable();
@@ -2497,10 +2514,15 @@ void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg,
         * Only the IOMMU itself can use the trick of putting destination
         * APIC ID into the high bits of the address. Anything else would
         * just be writing to memory if it tried that, and needs IR to
-        * address higher APIC IDs.
+        * address APICs which can't be addressed in the normal 32-bit
+        * address range at 0xFFExxxxx. That is typically just 8 bits, but
+        * some hypervisors allow the extended destination ID field in bits
+        * 5-11 to be used, giving support for 15 bits of APIC IDs in total.
         */
        if (dmar)
                msg->arch_addr_hi.destid_8_31 = cfg->dest_apicid >> 8;
+       else if (virt_ext_dest_id && cfg->dest_apicid < 0x8000)
+               msg->arch_addr_lo.virt_destid_8_14 = cfg->dest_apicid >> 8;
        else
                WARN_ON_ONCE(cfg->dest_apicid > 0xFF);
 }
index a3038d8deb6a4c98a00b05284a119e061b7985e0..8b395821cb8d022ba7a532f43dfcf01361fe12bf 100644 (file)
@@ -110,6 +110,7 @@ struct x86_init_ops x86_init __initdata = {
                .init_platform          = x86_init_noop,
                .guest_late_init        = x86_init_noop,
                .x2apic_available       = bool_x86_init_noop,
+               .msi_ext_dest_id        = bool_x86_init_noop,
                .init_mem_mapping       = x86_init_noop,
                .init_after_bootmem     = x86_init_noop,
        },