Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Jul 2019 18:22:57 +0000 (11:22 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Jul 2019 18:22:57 +0000 (11:22 -0700)
Pull x96 apic updates from Thomas Gleixner:
 "Updates for the x86 APIC interrupt handling and APIC timer:

   - Fix a long standing issue with spurious interrupts which was caused
     by the big vector management rework a few years ago. Robert Hodaszi
     provided finally enough debug data and an excellent initial failure
     analysis which allowed to understand the underlying issues.

     This contains a change to the core interrupt management code which
     is required to handle this correctly for the APIC/IO_APIC. The core
     changes are NOOPs for most architectures except ARM64. ARM64 is not
     impacted by the change as confirmed by Marc Zyngier.

   - Newer systems allow to disable the PIT clock for power saving
     causing panic in the timer interrupt delivery check of the IO/APIC
     when the HPET timer is not enabled either. While the clock could be
     turned on this would cause an endless whack a mole game to chase
     the proper register in each affected chipset.

     These systems provide the relevant frequencies for TSC, CPU and the
     local APIC timer via CPUID and/or MSRs, which allows to avoid the
     PIT/HPET based calibration. As the calibration code is the only
     usage of the legacy timers on modern systems and is skipped anyway
     when the frequencies are known already, there is no point in
     setting up the PIT and actually checking for the interrupt delivery
     via IO/APIC.

     To achieve this on a wide variety of platforms, the CPUID/MSR based
     frequency readout has been made more robust, which also allowed to
     remove quite some workarounds which turned out to be not longer
     required. Thanks to Daniel Drake for analysis, patches and
     verification"

* 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/irq: Seperate unused system vectors from spurious entry again
  x86/irq: Handle spurious interrupt after shutdown gracefully
  x86/ioapic: Implement irq_get_irqchip_state() callback
  genirq: Add optional hardware synchronization for shutdown
  genirq: Fix misleading synchronize_irq() documentation
  genirq: Delay deactivation in free_irq()
  x86/timer: Skip PIT initialization on modern chipsets
  x86/apic: Use non-atomic operations when possible
  x86/apic: Make apic_bsp_setup() static
  x86/tsc: Set LAPIC timer period to crystal clock frequency
  x86/apic: Rename 'lapic_timer_frequency' to 'lapic_timer_period'
  x86/tsc: Use CPUID.0x16 to calculate missing crystal frequency

13 files changed:
1  2 
arch/x86/entry/entry_64.S
arch/x86/include/asm/apic.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/apic_flat_64.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/idt.c
arch/x86/kernel/irq.c
arch/x86/kernel/smp.c
arch/x86/kernel/tsc.c
kernel/irq/chip.c
kernel/irq/internals.h
kernel/irq/manage.c

index 11aa3b2afa4d8e2b3a7d9dc619270d1d8ae15a3d,6d835991bb23b3ca91346088053d4ea34a829a06..8dbca86c249b8e108ac30be6121a024ed63e74d4
@@@ -375,6 -375,18 +375,18 @@@ ENTRY(irq_entries_start
      .endr
  END(irq_entries_start)
  
+       .align 8
+ ENTRY(spurious_entries_start)
+     vector=FIRST_SYSTEM_VECTOR
+     .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+       UNWIND_HINT_IRET_REGS
+       pushq   $(~vector+0x80)                 /* Note: always in signed byte range */
+       jmp     common_spurious
+       .align  8
+       vector=vector+1
+     .endr
+ END(spurious_entries_start)
  .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
  #ifdef CONFIG_DEBUG_ENTRY
        pushq %rax
@@@ -571,10 -583,20 +583,20 @@@ _ASM_NOKPROBE(interrupt_entry
  
  /* Interrupt entry/exit. */
  
-       /*
-        * The interrupt stubs push (~vector+0x80) onto the stack and
-        * then jump to common_interrupt.
-        */
+ /*
+  * The interrupt stubs push (~vector+0x80) onto the stack and
+  * then jump to common_spurious/interrupt.
+  */
+ common_spurious:
+       addq    $-0x80, (%rsp)                  /* Adjust vector to [-256, -1] range */
+       call    interrupt_entry
+       UNWIND_HINT_REGS indirect=1
+       call    smp_spurious_interrupt          /* rdi points to pt_regs */
+       jmp     ret_from_intr
+ END(common_spurious)
+ _ASM_NOKPROBE(common_spurious)
+ /* common_interrupt is a hotpath. Align it */
        .p2align CONFIG_X86_L1_CACHE_SHIFT
  common_interrupt:
        addq    $-0x80, (%rsp)                  /* Adjust vector to [-256, -1] range */
@@@ -878,7 -900,7 +900,7 @@@ apicinterrupt IRQ_WORK_VECTOR                      irq_wor
   * @paranoid == 2 is special: the stub will never switch stacks.  This is for
   * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
   */
 -.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
 +.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
  ENTRY(\sym)
        UNWIND_HINT_IRET_REGS offset=\has_error_code*8
  
        jnz     .Lfrom_usermode_switch_stack_\@
        .endif
  
 +      .if \create_gap == 1
 +      /*
 +       * If coming from kernel space, create a 6-word gap to allow the
 +       * int3 handler to emulate a call instruction.
 +       */
 +      testb   $3, CS-ORIG_RAX(%rsp)
 +      jnz     .Lfrom_usermode_no_gap_\@
 +      .rept   6
 +      pushq   5*8(%rsp)
 +      .endr
 +      UNWIND_HINT_IRET_REGS offset=8
 +.Lfrom_usermode_no_gap_\@:
 +      .endif
 +
        .if \paranoid
        call    paranoid_entry
        .else
@@@ -1143,7 -1151,7 +1165,7 @@@ apicinterrupt3 HYPERV_STIMER0_VECTOR 
  #endif /* CONFIG_HYPERV */
  
  idtentry debug                        do_debug                has_error_code=0        paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
 -idtentry int3                 do_int3                 has_error_code=0
 +idtentry int3                 do_int3                 has_error_code=0        create_gap=1
  idtentry stack_segment                do_stack_segment        has_error_code=1
  
  #ifdef CONFIG_XEN_PV
index 1340fa53b575b006bbe8f301dbae7531f47e09cf,693a0ad5601989f93f41f02f1ffbb9848b8c59b6..050e5f9ebf81c198f3d9b0aabae23d65be471e76
@@@ -1,4 -1,3 +1,4 @@@
 +/* SPDX-License-Identifier: GPL-2.0-only */
  #ifndef _ASM_X86_APIC_H
  #define _ASM_X86_APIC_H
  
@@@ -53,7 -52,7 +53,7 @@@ extern unsigned int apic_verbosity
  extern int local_apic_timer_c2_ok;
  
  extern int disable_apic;
- extern unsigned int lapic_timer_frequency;
+ extern unsigned int lapic_timer_period;
  
  extern enum apic_intr_mode_id apic_intr_mode;
  enum apic_intr_mode_id {
@@@ -155,7 -154,6 +155,6 @@@ static inline int apic_force_enable(uns
  extern int apic_force_enable(unsigned long addr);
  #endif
  
- extern void apic_bsp_setup(bool upmode);
  extern void apic_ap_setup(void);
  
  /*
@@@ -175,6 -173,7 +174,7 @@@ extern void lapic_assign_system_vectors
  extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
  extern void lapic_online(void);
  extern void lapic_offline(void);
+ extern bool apic_needs_pit(void);
  
  #else /* !CONFIG_X86_LOCAL_APIC */
  static inline void lapic_shutdown(void) { }
@@@ -188,6 -187,7 +188,7 @@@ static inline void init_bsp_APIC(void) 
  static inline void apic_intr_mode_init(void) { }
  static inline void lapic_assign_system_vectors(void) { }
  static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
+ static inline bool apic_needs_pit(void) { return true; }
  #endif /* !CONFIG_X86_LOCAL_APIC */
  
  #ifdef CONFIG_X86_X2APIC
@@@ -274,6 -274,7 +275,6 @@@ struct irq_data
  
  /*
   * Copyright 2004 James Cleverdon, IBM.
 - * Subject to the GNU Public License, v.2
   *
   * Generic APIC sub-arch data struct.
   *
index 85be316665b4a38e2d650bda07db3843485a91e1,a5241b209ea5c726c522247a583fd598338ded0c..1bd91cb7b320a2363097374cd9264005e39b04a7
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0-only
  /*
   *    Local APIC handling, local APIC timers
   *
@@@ -195,7 -194,7 +195,7 @@@ static struct resource lapic_resource 
        .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
  };
  
- unsigned int lapic_timer_frequency = 0;
+ unsigned int lapic_timer_period = 0;
  
  static void apic_pm_activate(void);
  
@@@ -501,7 -500,7 +501,7 @@@ lapic_timer_set_periodic_oneshot(struc
        if (evt->features & CLOCK_EVT_FEAT_DUMMY)
                return 0;
  
-       __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1);
+       __setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
        return 0;
  }
  
@@@ -805,11 -804,11 +805,11 @@@ calibrate_by_pmtimer(long deltapm, lon
  
  static int __init lapic_init_clockevent(void)
  {
-       if (!lapic_timer_frequency)
+       if (!lapic_timer_period)
                return -1;
  
        /* Calculate the scaled math multiplication factor */
-       lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+       lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
                                        TICK_NSEC, lapic_clockevent.shift);
        lapic_clockevent.max_delta_ns =
                clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
        return 0;
  }
  
+ bool __init apic_needs_pit(void)
+ {
+       /*
+        * If the frequencies are not known, PIT is required for both TSC
+        * and apic timer calibration.
+        */
+       if (!tsc_khz || !cpu_khz)
+               return true;
+       /* Is there an APIC at all? */
+       if (!boot_cpu_has(X86_FEATURE_APIC))
+               return true;
+       /* Deadline timer is based on TSC so no further PIT action required */
+       if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+               return false;
+       /* APIC timer disabled? */
+       if (disable_apic_timer)
+               return true;
+       /*
+        * The APIC timer frequency is known already, no PIT calibration
+        * required. If unknown, let the PIT be initialized.
+        */
+       return lapic_timer_period == 0;
+ }
  static int __init calibrate_APIC_clock(void)
  {
        struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
         */
        if (!lapic_init_clockevent()) {
                apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
-                           lapic_timer_frequency);
+                           lapic_timer_period);
                /*
                 * Direct calibration methods must have an always running
                 * local APIC timer, no need for broadcast timer.
        pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
                                        &delta, &deltatsc);
  
-       lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+       lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
        lapic_init_clockevent();
  
        apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
        apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
        apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
-                   lapic_timer_frequency);
+                   lapic_timer_period);
  
        if (boot_cpu_has(X86_FEATURE_TSC)) {
                apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
  
        apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
                    "%u.%04u MHz.\n",
-                   lapic_timer_frequency / (1000000 / HZ),
-                   lapic_timer_frequency % (1000000 / HZ));
+                   lapic_timer_period / (1000000 / HZ),
+                   lapic_timer_period % (1000000 / HZ));
  
        /*
         * Do a sanity check on the APIC calibration result
         */
-       if (lapic_timer_frequency < (1000000 / HZ)) {
+       if (lapic_timer_period < (1000000 / HZ)) {
                local_irq_enable();
                pr_warning("APIC frequency too slow, disabling apic timer\n");
                return -1;
@@@ -1351,6 -1377,8 +1378,8 @@@ void __init init_bsp_APIC(void
        apic_write(APIC_LVT1, value);
  }
  
+ static void __init apic_bsp_setup(bool upmode);
  /* Init the interrupt delivery mode for the BSP */
  void __init apic_intr_mode_init(void)
  {
@@@ -1464,8 -1492,7 +1493,8 @@@ static void apic_pending_intr_clear(voi
                if (queued) {
                        if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
                                ntsc = rdtsc();
 -                              max_loops = (cpu_khz << 10) - (ntsc - tsc);
 +                              max_loops = (long long)cpu_khz << 10;
 +                              max_loops -= ntsc - tsc;
                        } else {
                                max_loops--;
                        }
@@@ -2041,21 -2068,32 +2070,32 @@@ __visible void __irq_entry smp_spurious
        entering_irq();
        trace_spurious_apic_entry(vector);
  
+       inc_irq_stat(irq_spurious_count);
+       /*
+        * If this is a spurious interrupt then do not acknowledge
+        */
+       if (vector == SPURIOUS_APIC_VECTOR) {
+               /* See SDM vol 3 */
+               pr_info("Spurious APIC interrupt (vector 0xFF) on CPU#%d, should never happen.\n",
+                       smp_processor_id());
+               goto out;
+       }
        /*
-        * Check if this really is a spurious interrupt and ACK it
-        * if it is a vectored one.  Just in case...
-        * Spurious interrupts should not be ACKed.
+        * If it is a vectored one, verify it's set in the ISR. If set,
+        * acknowledge it.
         */
        v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
-       if (v & (1 << (vector & 0x1f)))
+       if (v & (1 << (vector & 0x1f))) {
+               pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Acked\n",
+                       vector, smp_processor_id());
                ack_APIC_irq();
-       inc_irq_stat(irq_spurious_count);
-       /* see sw-dev-man vol 3, chapter 7.4.13.5 */
-       pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
-               "should never happen.\n", vector, smp_processor_id());
+       } else {
+               pr_info("Spurious interrupt (vector 0x%02x) on CPU#%d. Not pending!\n",
+                       vector, smp_processor_id());
+       }
+ out:
        trace_spurious_apic_exit(vector);
        exiting_irq();
  }
@@@ -2416,11 -2454,8 +2456,8 @@@ static void __init apic_bsp_up_setup(vo
  /**
   * apic_bsp_setup - Setup function for local apic and io-apic
   * @upmode:           Force UP mode (for APIC_init_uniprocessor)
-  *
-  * Returns:
-  * apic_id of BSP APIC
   */
- void __init apic_bsp_setup(bool upmode)
static void __init apic_bsp_setup(bool upmode)
  {
        connect_bsp_APIC();
        if (upmode)
index bf083c3f1d732d1c94bac072ff798a063819c690,65072858f5531f33826aa4056f34f35f2185383a..bbdca603f94a66a417ea6441186744a84ef22052
@@@ -1,6 -1,6 +1,6 @@@
 +// SPDX-License-Identifier: GPL-2.0-only
  /*
   * Copyright 2004 James Cleverdon, IBM.
 - * Subject to the GNU Public License, v.2
   *
   * Flat APIC subarch code.
   *
@@@ -78,7 -78,7 +78,7 @@@ flat_send_IPI_mask_allbutself(const str
        int cpu = smp_processor_id();
  
        if (cpu < BITS_PER_LONG)
-               clear_bit(cpu, &mask);
+               __clear_bit(cpu, &mask);
  
        _flat_send_IPI_mask(mask, vector);
  }
@@@ -92,7 -92,7 +92,7 @@@ static void flat_send_IPI_allbutself(in
                        unsigned long mask = cpumask_bits(cpu_online_mask)[0];
  
                        if (cpu < BITS_PER_LONG)
-                               clear_bit(cpu, &mask);
+                               __clear_bit(cpu, &mask);
  
                        _flat_send_IPI_mask(mask, vector);
                }
index e7cb78aed644b28a40c5bf9a8e7f178bc383f524,1c6d1d5f28d3fdaeb1c9e047658b2d5767ff2f13..fdacb864c3dd4f8f3756ebf715678136876b0ebc
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0-only
  /*
   * Local APIC related interfaces to support IOAPIC, MSI, etc.
   *
@@@ -6,6 -5,10 +6,6 @@@
   *    Moved from arch/x86/kernel/apic/io_apic.c.
   * Jiang Liu <jiang.liu@linux.intel.com>
   *    Enable support of hierarchical irqdomains
 - *
 - * This program is free software; you can redistribute it and/or modify
 - * it under the terms of the GNU General Public License version 2 as
 - * published by the Free Software Foundation.
   */
  #include <linux/interrupt.h>
  #include <linux/irq.h>
@@@ -340,7 -343,7 +340,7 @@@ static void clear_irq_vector(struct irq
        trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
                           apicd->prev_cpu);
  
-       per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
+       per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_SHUTDOWN;
        irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
        apicd->vector = 0;
  
        if (!vector)
                return;
  
-       per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
+       per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_SHUTDOWN;
        irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
        apicd->prev_vector = 0;
        apicd->move_in_progress = 0;
index 1e5f7a03ddf526317b4bdeea72e362ec0ebcd28f,faae6115ddefc75fad5697156c2bb8d69436ae06..062f77279ce3bbbc439d9b9754f831b1165f2b2b
@@@ -1,9 -1,13 +1,9 @@@
 +// SPDX-License-Identifier: GPL-2.0-only
  /*
   * HyperV  Detection code.
   *
   * Copyright (C) 2010, Novell, Inc.
   * Author : K. Y. Srinivasan <ksrinivasan@novell.com>
 - *
 - * This program is free software; you can redistribute it and/or modify
 - * it under the terms of the GNU General Public License as published by
 - * the Free Software Foundation; version 2 of the License.
 - *
   */
  
  #include <linux/types.h>
@@@ -17,7 -21,6 +17,7 @@@
  #include <linux/irq.h>
  #include <linux/kexec.h>
  #include <linux/i8253.h>
 +#include <linux/random.h>
  #include <asm/processor.h>
  #include <asm/hypervisor.h>
  #include <asm/hyperv-tlfs.h>
@@@ -81,7 -84,6 +81,7 @@@ __visible void __irq_entry hv_stimer0_v
        inc_irq_stat(hyperv_stimer0_count);
        if (hv_stimer0_handler)
                hv_stimer0_handler();
 +      add_interrupt_randomness(HYPERV_STIMER0_VECTOR, 0);
        ack_APIC_irq();
  
        exiting_irq();
@@@ -91,7 -93,7 +91,7 @@@
  int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
  {
        *vector = HYPERV_STIMER0_VECTOR;
 -      *irq = 0;   /* Unused on x86/x64 */
 +      *irq = -1;   /* Unused on x86/x64 */
        hv_stimer0_handler = handler;
        return 0;
  }
@@@ -268,9 -270,9 +268,9 @@@ static void __init ms_hyperv_init_platf
  
                rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency);
                hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ);
-               lapic_timer_frequency = hv_lapic_frequency;
+               lapic_timer_period = hv_lapic_frequency;
                pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n",
-                       lapic_timer_frequency);
+                       lapic_timer_period);
        }
  
        register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST,
diff --combined arch/x86/kernel/idt.c
index d2482bbbe3d0894e3b6b55dfea4d885f065fdfa3,cc4444cb3898ca8ff8933062ddbf81d1c3b2defa..87ef69a72c52efea6ed898a3d171a7b02d3d5295
@@@ -1,6 -1,7 +1,6 @@@
 +// SPDX-License-Identifier: GPL-2.0-only
  /*
   * Interrupt descriptor table related code
 - *
 - * This file is licensed under the GPL V2
   */
  #include <linux/interrupt.h>
  
@@@ -319,7 -320,8 +319,8 @@@ void __init idt_setup_apic_and_irq_gate
  #ifdef CONFIG_X86_LOCAL_APIC
        for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
                set_bit(i, system_vectors);
-               set_intr_gate(i, spurious_interrupt);
+               entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR);
+               set_intr_gate(i, entry);
        }
  #endif
  }
diff --combined arch/x86/kernel/irq.c
index 9b68b5b00ac91cf7755c9767d8e78c1a558dacb8,a975246074b5c571e7346228553467e8f8244895..cc496eb7a8d21880cf80141ba69f5d4c759a3ba4
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0-only
  /*
   * Common interrupt code for 32 and 64 bit
   */
@@@ -247,7 -246,7 +247,7 @@@ __visible unsigned int __irq_entry do_I
        if (!handle_irq(desc, regs)) {
                ack_APIC_irq();
  
-               if (desc != VECTOR_RETRIGGERED) {
+               if (desc != VECTOR_RETRIGGERED && desc != VECTOR_SHUTDOWN) {
                        pr_emerg_ratelimited("%s: %d.%d No irq handler for vector\n",
                                             __func__, smp_processor_id(),
                                             vector);
diff --combined arch/x86/kernel/smp.c
index 4693e2f3a03ec7003e0562380142e21fd6a18899,acddd988602d38c52816c17a96c5301c3102e52d..96421f97e75cffd2297a755881e354eab0ec0394
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0-or-later
  /*
   *    Intel SMP support routines.
   *
@@@ -7,6 -6,9 +7,6 @@@
   *      (c) 2002,2003 Andi Kleen, SuSE Labs.
   *
   *    i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
 - *
 - *    This code is released under the GNU General Public License version 2 or
 - *    later.
   */
  
  #include <linux/init.h>
@@@ -144,7 -146,7 +144,7 @@@ void native_send_call_func_ipi(const st
        }
  
        cpumask_copy(allbutself, cpu_online_mask);
-       cpumask_clear_cpu(smp_processor_id(), allbutself);
+       __cpumask_clear_cpu(smp_processor_id(), allbutself);
  
        if (cpumask_equal(mask, allbutself) &&
            cpumask_equal(cpu_online_mask, cpu_callout_mask))
diff --combined arch/x86/kernel/tsc.c
index 0b29e58f288e81b189a25306f76089ab832c9633,8f47c4862c56adce7a9a67ae33113a4bde5de0e9..59b57605e66cb5b3f3e84021adaf87c4c7acfe7c
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0-only
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
  #include <linux/kernel.h>
@@@ -632,31 -631,38 +632,38 @@@ unsigned long native_calibrate_tsc(void
  
        crystal_khz = ecx_hz / 1000;
  
-       if (crystal_khz == 0) {
-               switch (boot_cpu_data.x86_model) {
-               case INTEL_FAM6_SKYLAKE_MOBILE:
-               case INTEL_FAM6_SKYLAKE_DESKTOP:
-               case INTEL_FAM6_KABYLAKE_MOBILE:
-               case INTEL_FAM6_KABYLAKE_DESKTOP:
-                       crystal_khz = 24000;    /* 24.0 MHz */
-                       break;
-               case INTEL_FAM6_ATOM_GOLDMONT_X:
-                       crystal_khz = 25000;    /* 25.0 MHz */
-                       break;
-               case INTEL_FAM6_ATOM_GOLDMONT:
-                       crystal_khz = 19200;    /* 19.2 MHz */
-                       break;
-               }
-       }
+       /*
+        * Denverton SoCs don't report crystal clock, and also don't support
+        * CPUID.0x16 for the calculation below, so hardcode the 25MHz crystal
+        * clock.
+        */
+       if (crystal_khz == 0 &&
+                       boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT_X)
+               crystal_khz = 25000;
  
-       if (crystal_khz == 0)
-               return 0;
        /*
-        * TSC frequency determined by CPUID is a "hardware reported"
+        * TSC frequency reported directly by CPUID is a "hardware reported"
         * frequency and is the most accurate one so far we have. This
         * is considered a known frequency.
         */
-       setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+       if (crystal_khz != 0)
+               setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
+       /*
+        * Some Intel SoCs like Skylake and Kabylake don't report the crystal
+        * clock, but we can easily calculate it to a high degree of accuracy
+        * by considering the crystal ratio and the CPU speed.
+        */
+       if (crystal_khz == 0 && boot_cpu_data.cpuid_level >= 0x16) {
+               unsigned int eax_base_mhz, ebx, ecx, edx;
+               cpuid(0x16, &eax_base_mhz, &ebx, &ecx, &edx);
+               crystal_khz = eax_base_mhz * 1000 *
+                       eax_denominator / ebx_numerator;
+       }
+       if (crystal_khz == 0)
+               return 0;
  
        /*
         * For Atom SoCs TSC is the only reliable clocksource.
        if (boot_cpu_data.x86_model == INTEL_FAM6_ATOM_GOLDMONT)
                setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
  
+ #ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * The local APIC appears to be fed by the core crystal clock
+        * (which sounds entirely sensible). We can set the global
+        * lapic_timer_period here to avoid having to calibrate the APIC
+        * timer later.
+        */
+       lapic_timer_period = crystal_khz * 1000 / HZ;
+ #endif
        return crystal_khz * ebx_numerator / eax_denominator;
  }
  
@@@ -980,7 -996,7 +997,7 @@@ static int time_cpufreq_notifier(struc
                if (!(freq->flags & CPUFREQ_CONST_LOOPS))
                        mark_tsc_unstable("cpufreq changes");
  
 -              set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc());
 +              set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc());
        }
  
        return 0;
diff --combined kernel/irq/chip.c
index 04c850fb70cb8c2e3c384f4f30daf7f9528adaf6,04fe4f989bd8b4eaef30acae90b971de9bf77012..b76703b2c0af28582e29252b5901578d9aeed532
@@@ -314,6 -314,12 +314,12 @@@ void irq_shutdown(struct irq_desc *desc
                }
                irq_state_clr_started(desc);
        }
+ }
+ void irq_shutdown_and_deactivate(struct irq_desc *desc)
+ {
+       irq_shutdown(desc);
        /*
         * This must be called even if the interrupt was never started up,
         * because the activation can happen before the interrupt is
@@@ -748,8 -754,6 +754,8 @@@ void handle_fasteoi_nmi(struct irq_des
        unsigned int irq = irq_desc_get_irq(desc);
        irqreturn_t res;
  
 +      __kstat_incr_irqs_this_cpu(desc);
 +
        trace_irq_handler_entry(irq, action);
        /*
         * NMIs cannot be shared, there is only one action.
@@@ -964,8 -968,6 +970,8 @@@ void handle_percpu_devid_fasteoi_nmi(st
        unsigned int irq = irq_desc_get_irq(desc);
        irqreturn_t res;
  
 +      __kstat_incr_irqs_this_cpu(desc);
 +
        trace_irq_handler_entry(irq, action);
        res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
        trace_irq_handler_exit(irq, action, res);
@@@ -1463,33 -1465,6 +1469,33 @@@ int irq_chip_set_wake_parent(struct irq
        return -ENOSYS;
  }
  EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent);
 +
 +/**
 + * irq_chip_request_resources_parent - Request resources on the parent interrupt
 + * @data:     Pointer to interrupt specific data
 + */
 +int irq_chip_request_resources_parent(struct irq_data *data)
 +{
 +      data = data->parent_data;
 +
 +      if (data->chip->irq_request_resources)
 +              return data->chip->irq_request_resources(data);
 +
 +      return -ENOSYS;
 +}
 +EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent);
 +
 +/**
 + * irq_chip_release_resources_parent - Release resources on the parent interrupt
 + * @data:     Pointer to interrupt specific data
 + */
 +void irq_chip_release_resources_parent(struct irq_data *data)
 +{
 +      data = data->parent_data;
 +      if (data->chip->irq_release_resources)
 +              data->chip->irq_release_resources(data);
 +}
 +EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent);
  #endif
  
  /**
diff --combined kernel/irq/internals.h
index 21f9927ff5ad6c7e52b2e0b993d28cb08034e3e9,3a948f41ab00565417b840bb9092480876efd1be..3924fbe829d4a8aeaa32750580212b0f008d7f39
@@@ -82,6 -82,7 +82,7 @@@ extern int irq_activate_and_startup(str
  extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
  
  extern void irq_shutdown(struct irq_desc *desc);
+ extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
  extern void irq_enable(struct irq_desc *desc);
  extern void irq_disable(struct irq_desc *desc);
  extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
@@@ -96,6 -97,10 +97,10 @@@ static inline void irq_mark_irq(unsigne
  extern void irq_mark_irq(unsigned int irq);
  #endif
  
+ extern int __irq_get_irqchip_state(struct irq_data *data,
+                                  enum irqchip_irq_state which,
+                                  bool *state);
  extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
  
  irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
@@@ -354,16 -359,6 +359,16 @@@ static inline int irq_timing_decode(u6
        return value & U16_MAX;
  }
  
 +static __always_inline void irq_timings_push(u64 ts, int irq)
 +{
 +      struct irq_timings *timings = this_cpu_ptr(&irq_timings);
 +
 +      timings->values[timings->count & IRQ_TIMINGS_MASK] =
 +              irq_timing_encode(ts, irq);
 +
 +      timings->count++;
 +}
 +
  /*
   * The function record_irq_time is only called in one place in the
   * interrupts handler. We want this function always inline so the code
@@@ -377,8 -372,15 +382,8 @@@ static __always_inline void record_irq_
        if (!static_branch_likely(&irq_timing_enabled))
                return;
  
 -      if (desc->istate & IRQS_TIMINGS) {
 -              struct irq_timings *timings = this_cpu_ptr(&irq_timings);
 -
 -              timings->values[timings->count & IRQ_TIMINGS_MASK] =
 -                      irq_timing_encode(local_clock(),
 -                                        irq_desc_get_irq(desc));
 -
 -              timings->count++;
 -      }
 +      if (desc->istate & IRQS_TIMINGS)
 +              irq_timings_push(local_clock(), irq_desc_get_irq(desc));
  }
  #else
  static inline void irq_remove_timings(struct irq_desc *desc) {}
diff --combined kernel/irq/manage.c
index 78f3ddeb7fe44a297229ebd65f28e591f4e3a56f,fad61986f35cbee72ad42ad401b452b81cddb5e7..e8f7f179bf77e6a721deaaf349462edbc14ef8f3
@@@ -13,6 -13,7 +13,7 @@@
  #include <linux/module.h>
  #include <linux/random.h>
  #include <linux/interrupt.h>
+ #include <linux/irqdomain.h>
  #include <linux/slab.h>
  #include <linux/sched.h>
  #include <linux/sched/rt.h>
@@@ -34,8 -35,9 +35,9 @@@ static int __init setup_forced_irqthrea
  early_param("threadirqs", setup_forced_irqthreads);
  #endif
  
- static void __synchronize_hardirq(struct irq_desc *desc)
+ static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
  {
+       struct irq_data *irqd = irq_desc_get_irq_data(desc);
        bool inprogress;
  
        do {
                /* Ok, that indicated we're done: double-check carefully. */
                raw_spin_lock_irqsave(&desc->lock, flags);
                inprogress = irqd_irq_inprogress(&desc->irq_data);
+               /*
+                * If requested and supported, check at the chip whether it
+                * is in flight at the hardware level, i.e. already pending
+                * in a CPU and waiting for service and acknowledge.
+                */
+               if (!inprogress && sync_chip) {
+                       /*
+                        * Ignore the return code. inprogress is only updated
+                        * when the chip supports it.
+                        */
+                       __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
+                                               &inprogress);
+               }
                raw_spin_unlock_irqrestore(&desc->lock, flags);
  
                /* Oops, that failed? */
   *    Returns: false if a threaded handler is active.
   *
   *    This function may be called - with care - from IRQ context.
+  *
+  *    It does not check whether there is an interrupt in flight at the
+  *    hardware level, but not serviced yet, as this might deadlock when
+  *    called with interrupts disabled and the target CPU of the interrupt
+  *    is the current CPU.
   */
  bool synchronize_hardirq(unsigned int irq)
  {
        struct irq_desc *desc = irq_to_desc(irq);
  
        if (desc) {
-               __synchronize_hardirq(desc);
+               __synchronize_hardirq(desc, false);
                return !atomic_read(&desc->threads_active);
        }
  
@@@ -95,14 -116,19 +116,19 @@@ EXPORT_SYMBOL(synchronize_hardirq)
   *    to complete before returning. If you use this function while
   *    holding a resource the IRQ handler may need you will deadlock.
   *
-  *    This function may be called - with care - from IRQ context.
+  *    Can only be called from preemptible code as it might sleep when
+  *    an interrupt thread is associated to @irq.
+  *
+  *    It optionally makes sure (when the irq chip supports that method)
+  *    that the interrupt is not pending in any CPU and waiting for
+  *    service.
   */
  void synchronize_irq(unsigned int irq)
  {
        struct irq_desc *desc = irq_to_desc(irq);
  
        if (desc) {
-               __synchronize_hardirq(desc);
+               __synchronize_hardirq(desc, true);
                /*
                 * We made sure that no hardirq handler is
                 * running. Now verify that no threaded handlers are
@@@ -781,7 -807,7 +807,7 @@@ int __irq_set_trigger(struct irq_desc *
                ret = 0;
                break;
        default:
 -              pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n",
 +              pr_err("Setting trigger mode %lu for irq %u failed (%pS)\n",
                       flags, irq_desc_get_irq(desc), chip->irq_set_type);
        }
        if (unmask)
@@@ -1699,6 -1725,7 +1725,7 @@@ static struct irqaction *__free_irq(str
        /* If this was the last handler, shut down the IRQ line: */
        if (!desc->action) {
                irq_settings_clr_disable_unlazy(desc);
+               /* Only shutdown. Deactivate after synchronize_hardirq() */
                irq_shutdown(desc);
        }
  
  
        unregister_handler_proc(irq, action);
  
-       /* Make sure it's not being used on another CPU: */
-       synchronize_hardirq(irq);
+       /*
+        * Make sure it's not being used on another CPU and if the chip
+        * supports it also make sure that there is no (not yet serviced)
+        * interrupt in flight at the hardware level.
+        */
+       __synchronize_hardirq(desc, true);
  
  #ifdef CONFIG_DEBUG_SHIRQ
        /*
                 * require it to deallocate resources over the slow bus.
                 */
                chip_bus_lock(desc);
+               /*
+                * There is no interrupt on the fly anymore. Deactivate it
+                * completely.
+                */
+               raw_spin_lock_irqsave(&desc->lock, flags);
+               irq_domain_deactivate_irq(&desc->irq_data);
+               raw_spin_unlock_irqrestore(&desc->lock, flags);
                irq_release_resources(desc);
                chip_bus_sync_unlock(desc);
                irq_remove_timings(desc);
@@@ -1855,7 -1894,7 +1894,7 @@@ static const void *__cleanup_nmi(unsign
        }
  
        irq_settings_clr_disable_unlazy(desc);
-       irq_shutdown(desc);
+       irq_shutdown_and_deactivate(desc);
  
        irq_release_resources(desc);
  
        irq_put_desc_unlock(desc, flags);
  }
  
+ int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
+                           bool *state)
+ {
+       struct irq_chip *chip;
+       int err = -EINVAL;
+       do {
+               chip = irq_data_get_irq_chip(data);
+               if (chip->irq_get_irqchip_state)
+                       break;
+ #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+               data = data->parent_data;
+ #else
+               data = NULL;
+ #endif
+       } while (data);
+       if (data)
+               err = chip->irq_get_irqchip_state(data, which, state);
+       return err;
+ }
  /**
   *    irq_get_irqchip_state - returns the irqchip state of a interrupt.
   *    @irq: Interrupt line that is forwarded to a VM
@@@ -2596,7 -2657,6 +2657,6 @@@ int irq_get_irqchip_state(unsigned int 
  {
        struct irq_desc *desc;
        struct irq_data *data;
-       struct irq_chip *chip;
        unsigned long flags;
        int err = -EINVAL;
  
  
        data = irq_desc_get_irq_data(desc);
  
-       do {
-               chip = irq_data_get_irq_chip(data);
-               if (chip->irq_get_irqchip_state)
-                       break;
- #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
-               data = data->parent_data;
- #else
-               data = NULL;
- #endif
-       } while (data);
-       if (data)
-               err = chip->irq_get_irqchip_state(data, which, state);
+       err = __irq_get_irqchip_state(data, which, state);
  
        irq_put_desc_busunlock(desc, flags);
        return err;