Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
authorDavid S. Miller <davem@davemloft.net>
Thu, 18 Jan 2018 14:17:04 +0000 (09:17 -0500)
committerDavid S. Miller <davem@davemloft.net>
Thu, 18 Jan 2018 14:17:04 +0000 (09:17 -0500)
Daniel Borkmann says:

====================
pull-request: bpf 2018-01-18

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Fix a divide by zero due to wrong if (src_reg == 0) check in
   64-bit mode. Properly handle this in interpreter and mask it
   also generically in verifier to guard against similar checks
   in JITs, from Eric and Alexei.

2) Fix a bug in arm64 JIT when tail calls are involved and progs
   have different stack sizes, from Daniel.

3) Reject stores into BPF context that are not expected BPF_STX |
   BPF_MEM variant, from Daniel.

4) Mark dst reg as unknown on {s,u}bounds adjustments when the
   src reg has derived bounds from dead branches, from Daniel.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
179 files changed:
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/admin-guide/kernel-parameters.txt
Documentation/filesystems/nilfs2.txt
Documentation/kbuild/kconfig-language.txt
Documentation/usb/gadget-testing.txt
Documentation/x86/pti.txt [new file with mode: 0644]
MAINTAINERS
Makefile
arch/ia64/kernel/time.c
arch/powerpc/include/asm/exception-64e.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/feature-fixups.h
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/plpar_wrappers.h
arch/powerpc/include/asm/setup.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/book3s_rmhandlers.S
arch/powerpc/kvm/book3s_segment.S
arch/powerpc/lib/feature-fixups.c
arch/powerpc/platforms/powernv/setup.c
arch/powerpc/platforms/pseries/dlpar.c
arch/powerpc/platforms/pseries/pseries.h
arch/powerpc/platforms/pseries/ras.c
arch/powerpc/platforms/pseries/setup.c
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/crypto/aesni-intel_asm.S
arch/x86/crypto/camellia-aesni-avx-asm_64.S
arch/x86/crypto/camellia-aesni-avx2-asm_64.S
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/events/intel/bts.c
arch/x86/include/asm/asm-prototypes.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/mshyperv.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nospec-branch.h [new file with mode: 0644]
arch/x86/include/asm/pci_x86.h
arch/x86/include/asm/processor-flags.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/xen/hypercall.h
arch/x86/kernel/alternative.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/ftrace_32.S
arch/x86/kernel/ftrace_64.S
arch/x86/kernel/irq_32.c
arch/x86/kernel/tboot.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/lib/Makefile
arch/x86/lib/checksum_32.S
arch/x86/lib/retpoline.S [new file with mode: 0644]
arch/x86/mm/pti.c
arch/x86/pci/common.c
arch/x86/pci/fixup.c
arch/x86/platform/efi/efi_64.c
arch/x86/platform/intel-mid/device_libs/platform_bt.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/xen-ops.h
crypto/algapi.c
drivers/base/Kconfig
drivers/base/cpu.c
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/gtt.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
drivers/gpu/drm/tegra/sor.c
drivers/gpu/drm/vc4/vc4_irq.c
drivers/gpu/drm/vc4/vc4_v3d.c
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/mmc/host/renesas_sdhi_core.c
drivers/mmc/host/s3cmci.c
drivers/mux/core.c
drivers/net/can/usb/peak_usb/pcan_usb_fd.c
drivers/net/ethernet/cirrus/cs89x0.c
drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
drivers/net/ethernet/freescale/fs_enet/fs_enet.h
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/qlogic/qed/qed_spq.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/ti/netcp_core.c
drivers/net/ppp/ppp_generic.c
drivers/net/tun.c
drivers/net/usb/lan78xx.c
drivers/net/usb/r8152.c
drivers/net/wireless/mac80211_hwsim.c
drivers/nvme/host/fabrics.c
drivers/staging/android/ashmem.c
drivers/usb/gadget/udc/core.c
drivers/usb/misc/usb3503.c
drivers/usb/mon/mon_bin.c
drivers/usb/serial/cp210x.c
drivers/usb/storage/unusual_uas.h
drivers/usb/usbip/usbip_common.c
drivers/usb/usbip/vudc_rx.c
drivers/usb/usbip/vudc_tx.c
drivers/xen/gntdev.c
include/linux/completion.h
include/linux/cpu.h
include/linux/crash_core.h
include/linux/irqflags.h
include/linux/lockdep.h
include/linux/netlink.h
include/linux/ptr_ring.h
include/net/arp.h
include/net/cfg80211.h
include/net/sch_generic.h
include/net/tls.h
include/uapi/linux/openvswitch.h
init/Kconfig
kernel/crash_core.c
kernel/sched/completion.c
kernel/sched/membarrier.c
kernel/trace/Kconfig
kernel/trace/ring_buffer.c
mm/kmemleak.c
net/9p/trans_xen.c
net/bluetooth/l2cap_core.c
net/core/neighbour.c
net/ipv4/arp.c
net/ipv4/route.c
net/ipv6/ip6_output.c
net/netlink/af_netlink.c
net/openvswitch/flow_netlink.c
net/sched/cls_bpf.c
net/sched/sch_api.c
net/sched/sch_generic.c
net/sched/sch_ingress.c
net/sctp/ipv6.c
net/sctp/outqueue.c
net/sctp/socket.c
net/tipc/node.c
net/tls/tls_main.c
net/tls/tls_sw.c
net/wireless/core.c
net/wireless/core.h
net/wireless/nl80211.c
net/wireless/reg.c
scripts/genksyms/.gitignore
scripts/kconfig/expr.c
security/Kconfig
security/apparmor/domain.c
security/apparmor/include/perms.h
security/apparmor/ipc.c
sound/core/pcm_lib.c
sound/core/seq/seq_clientmgr.c
sound/core/seq/seq_clientmgr.h
sound/pci/hda/patch_cirrus.c
sound/pci/hda/patch_realtek.c
tools/objtool/Makefile
tools/objtool/check.c
tools/objtool/check.h
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/test_vsyscall.c [new file with mode: 0644]

index d6d862db3b5d65fe09c26783298bba21ceec5558..bfd29bc8d37af1bbc4913deee9d941b1692bedda 100644 (file)
@@ -375,3 +375,19 @@ Contact:   Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   information about CPUs heterogeneity.
 
                cpu_capacity: capacity of cpu#.
+
+What:          /sys/devices/system/cpu/vulnerabilities
+               /sys/devices/system/cpu/vulnerabilities/meltdown
+               /sys/devices/system/cpu/vulnerabilities/spectre_v1
+               /sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date:          January 2018
+Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:   Information about CPU vulnerabilities
+
+               The files are named after the code names of CPU
+               vulnerabilities. The output of those files reflects the
+               state of the CPUs in the system. Possible output values:
+
+               "Not affected"    CPU is not affected by the vulnerability
+               "Vulnerable"      CPU is affected and no mitigation in effect
+               "Mitigation: $M"  CPU is affected and mitigation $M is in effect
index af7104aaffd92f1eb3db02df44ffc33e7df78adc..46b26bfee27ba81267703a1ceac7c0e082f1e5ec 100644 (file)
                        It will be ignored when crashkernel=X,high is not used
                        or memory reserved is below 4G.
 
-       crossrelease_fullstack
-                       [KNL] Allow to record full stack trace in cross-release
-
        cryptomgr.notests
                         [KNL] Disable crypto self-tests
 
        nosmt           [KNL,S390] Disable symmetric multithreading (SMT).
                        Equivalent to smt=1.
 
+       nospectre_v2    [X86] Disable all mitigations for the Spectre variant 2
+                       (indirect branch prediction) vulnerability. System may
+                       allow data leaks with this option, which is equivalent
+                       to spectre_v2=off.
+
        noxsave         [BUGS=X86] Disables x86 extended register state save
                        and restore using xsave. The kernel will fallback to
                        enabling legacy floating-point and sse state.
                        steal time is computed, but won't influence scheduler
                        behaviour
 
-       nopti           [X86-64] Disable kernel page table isolation
-
        nolapic         [X86-32,APIC] Do not enable or use the local APIC.
 
        nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
                pcie_scan_all   Scan all possible PCIe devices.  Otherwise we
                                only look for one device below a PCIe downstream
                                port.
+               big_root_window Try to add a big 64bit memory window to the PCIe
+                               root complex on AMD CPUs. Some GFX hardware
+                               can resize a BAR to allow access to all VRAM.
+                               Adding the window is slightly risky (it may
+                               conflict with unreported devices), so this
+                               taints the kernel.
 
        pcie_aspm=      [PCIE] Forcibly enable or disable PCIe Active State Power
                        Management.
        pt.             [PARIDE]
                        See Documentation/blockdev/paride.txt.
 
-       pti=            [X86_64]
-                       Control user/kernel address space isolation:
-                       on - enable
-                       off - disable
-                       auto - default setting
+       pti=            [X86_64] Control Page Table Isolation of user and
+                       kernel address spaces.  Disabling this feature
+                       removes hardening, but improves performance of
+                       system calls and interrupts.
+
+                       on   - unconditionally enable
+                       off  - unconditionally disable
+                       auto - kernel detects whether your CPU model is
+                              vulnerable to issues that PTI mitigates
+
+                       Not specifying this option is equivalent to pti=auto.
+
+       nopti           [X86_64]
+                       Equivalent to pti=off
 
        pty.legacy_count=
                        [KNL] Number of legacy pty's. Overwrites compiled-in
        sonypi.*=       [HW] Sony Programmable I/O Control Device driver
                        See Documentation/laptops/sonypi.txt
 
+       spectre_v2=     [X86] Control mitigation of Spectre variant 2
+                       (indirect branch speculation) vulnerability.
+
+                       on   - unconditionally enable
+                       off  - unconditionally disable
+                       auto - kernel detects whether your CPU model is
+                              vulnerable
+
+                       Selecting 'on' will, and 'auto' may, choose a
+                       mitigation method at run time according to the
+                       CPU, the available microcode, the setting of the
+                       CONFIG_RETPOLINE configuration option, and the
+                       compiler with which the kernel was built.
+
+                       Specific mitigations can also be selected manually:
+
+                       retpoline         - replace indirect branches
+                       retpoline,generic - google's original retpoline
+                       retpoline,amd     - AMD-specific minimal thunk
+
+                       Not specifying this option is equivalent to
+                       spectre_v2=auto.
+
        spia_io_base=   [HW,MTD]
        spia_fio_base=
        spia_pedr=
index c0727dc36271e99eaf844c205fff6d901c3d057b..f2f3f8592a6f5e12bdcd2d56005deaf52030d0f7 100644 (file)
@@ -25,8 +25,8 @@ available from the following download page.  At least "mkfs.nilfs2",
 cleaner or garbage collector) are required.  Details on the tools are
 described in the man pages included in the package.
 
-Project web page:    http://nilfs.sourceforge.net/
-Download page:       http://nilfs.sourceforge.net/en/download.html
+Project web page:    https://nilfs.sourceforge.io/
+Download page:       https://nilfs.sourceforge.io/en/download.html
 List info:           http://vger.kernel.org/vger-lists.html#linux-nilfs
 
 Caveats
index 262722d8867b2aa9f70f137529dd298e5e36af60..c4a293a03c337f5080d9cf682f64428fc74a1d79 100644 (file)
@@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax:
 <expr> ::= <symbol>                             (1)
            <symbol> '=' <symbol>                (2)
            <symbol> '!=' <symbol>               (3)
-           '(' <expr> ')'                       (4)
-           '!' <expr>                           (5)
-           <expr> '&&' <expr>                   (6)
-           <expr> '||' <expr>                   (7)
+           <symbol1> '<' <symbol2>              (4)
+           <symbol1> '>' <symbol2>              (4)
+           <symbol1> '<=' <symbol2>             (4)
+           <symbol1> '>=' <symbol2>             (4)
+           '(' <expr> ')'                       (5)
+           '!' <expr>                           (6)
+           <expr> '&&' <expr>                   (7)
+           <expr> '||' <expr>                   (8)
 
 Expressions are listed in decreasing order of precedence. 
 
@@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence.
     otherwise 'n'.
 (3) If the values of both symbols are equal, it returns 'n',
     otherwise 'y'.
-(4) Returns the value of the expression. Used to override precedence.
-(5) Returns the result of (2-/expr/).
-(6) Returns the result of min(/expr/, /expr/).
-(7) Returns the result of max(/expr/, /expr/).
+(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
+    or greater-or-equal than value of <symbol2>, it returns 'y',
+    otherwise 'n'.
+(5) Returns the value of the expression. Used to override precedence.
+(6) Returns the result of (2-/expr/).
+(7) Returns the result of min(/expr/, /expr/).
+(8) Returns the result of max(/expr/, /expr/).
 
 An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
 respectively for calculations). A menu entry becomes visible when its
index 441a4b9b666fbb2b2aace2cbc2f5ab542635d553..5908a21fddb6035cd25bb759bfd01ee405371163 100644 (file)
@@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value
 in each line. The rules stated above are best illustrated with an example:
 
 # mkdir functions/uvc.usb0/control/header/h
-# cd functions/uvc.usb0/control/header/h
+# cd functions/uvc.usb0/control/
 # ln -s header/h class/fs
 # ln -s header/h class/ss
 # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
new file mode 100644 (file)
index 0000000..d11eff6
--- /dev/null
@@ -0,0 +1,186 @@
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications.  When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy.  When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT).  There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled.  It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI.  This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level.  This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel.  This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal.  The only difference is when the kernel
+makes entries in the top (PGD) level.  In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables.  This leaves a single, shared set of
+userspace page tables to manage.  One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important.  But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+  a. Each process now needs an order-1 PGD instead of order-0.
+     (Consumes an additional 4k per process).
+  b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+     aligned so that it can be mapped by setting a single PMD
+     entry.  This consumes nearly 2MB of RAM once the kernel
+     is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+  a. CR3 manipulation to switch between the page table copies
+     must be done at interrupt, syscall, and exception entry
+     and exit (it can be skipped when the kernel is interrupted,
+     though.)  Moves to CR3 are on the order of a hundred
+     cycles, and are required at every entry and exit.
+  b. A "trampoline" must be used for SYSCALL entry.  This
+     trampoline depends on a smaller set of resources than the
+     non-PTI SYSCALL entry code, so requires mapping fewer
+     things into the userspace page tables.  The downside is
+     that stacks must be switched at entry time.
+  d. Global pages are disabled for all kernel structures not
+     mapped into both kernel and userspace page tables.  This
+     feature of the MMU allows different processes to share TLB
+     entries mapping the kernel.  Losing the feature means more
+     TLB misses after a context switch.  The actual loss of
+     performance is very small, however, never exceeding 1%.
+  d. Process Context IDentifiers (PCID) is a CPU feature that
+     allows us to skip flushing the entire TLB when switching page
+     tables by setting a special bit in CR3 when the page tables
+     are changed.  This makes switching the page tables (at context
+     switch, or kernel entry/exit) cheaper.  But, on systems with
+     PCID support, the context switch code must flush both the user
+     and kernel entries out of the TLB.  The user PCID TLB flush is
+     deferred until the exit to userspace, minimizing the cost.
+     See intel.com/sdm for the gory PCID/INVPCID details.
+  e. The userspace page tables must be populated for each new
+     process.  Even without PTI, the shared kernel mappings
+     are created by copying top-level (PGD) entries into each
+     new process.  But, with PTI, there are now *two* kernel
+     mappings: one in the kernel page tables that maps everything
+     and one for the entry/exit structures.  At fork(), we need to
+     copy both.
+  f. In addition to the fork()-time copying, there must also
+     be an update to the userspace PGD any time a set_pgd() is done
+     on a PGD used to map userspace.  This ensures that the kernel
+     and userspace copies always map the same userspace
+     memory.
+  g. On systems without PCID support, each CR3 write flushes
+     the entire TLB.  That means that each syscall, interrupt
+     or exception flushes the TLB.
+  h. INVPCID is a TLB-flushing instruction which allows flushing
+     of TLB entries for non-current PCIDs.  Some systems support
+     PCIDs, but do not support INVPCID.  On these systems, addresses
+     can only be flushed from the TLB for the current PCID.  When
+     flushing a kernel address, we need to flush all PCIDs, so a
+     single kernel address flush will require a TLB-flushing CR3
+     write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+   unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+   boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+   (excluding MPX and protection_keys) in a loop on multiple CPUs for
+   several minutes.  These tests frequently uncover corner cases in the
+   kernel entry code.  In general, old kernels might cause these tests
+   themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+   frequent performance monitoring non-maskable interrupts (see "NMI"
+   in /proc/interrupts).  This exercises the NMI entry/exit code which
+   is known to trigger bugs in code paths that did not expect to be
+   interrupted, including nested NMIs.  Using "-c" boosts the rate of
+   NMIs, and using two -c with separate counters encourages nested NMIs
+   and less deterministic behavior.
+
+       while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+   This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code.  Usually a bug in one of the
+   more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup.  Bugs
+   in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt.  Caused by bugs in entry_64.S,
+   like screwing up a page table switch.  Also caused by
+   incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI.  The NMI code is separate from main
+   interrupt handlers and can have bugs that do not affect
+   normal interrupts.  Also caused by incorrectly mapping NMI
+   code.  NMIs that interrupt the entry code must be very
+   careful and can be the cause of crashes that show up when
+   running perf.
+ * Kernel crashes at the first exit to userspace.  entry_64.S
+   bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+   in entry_64.S that return to userspace are sometimes separate
+   from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+   faults upon page faults.  Caused by touching non-pti-mapped
+   data in the entry code, or forgetting to switch to kernel
+   CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+   as mount(8) failing to mount the rootfs.  These have
+   tended to be TLB invalidation issues.  Usually invalidating
+   the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf
index d76af75a653afcfeece6f27bc1290313528f5a98..18994806e441f86f2415e417458a02dc111fa6d2 100644 (file)
@@ -9638,8 +9638,8 @@ F:        include/uapi/linux/sunrpc/
 NILFS2 FILESYSTEM
 M:     Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
 L:     linux-nilfs@vger.kernel.org
-W:     http://nilfs.sourceforge.net/
-W:     http://nilfs.osdn.jp/
+W:     https://nilfs.sourceforge.io/
+W:     https://nilfs.osdn.jp/
 T:     git git://github.com/konis/nilfs2.git
 S:     Supported
 F:     Documentation/filesystems/nilfs2.txt
index eb59638035dd6fe1d0e7aac80b1357aae565c7eb..bf5b8cbb9469db3ab4a42e636746cab82a093044 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 15
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -484,26 +484,6 @@ CLANG_GCC_TC       := --gcc-toolchain=$(GCC_TOOLCHAIN)
 endif
 KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
-KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
-KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
-# Quiet clang warning: comparison of unsigned expression < 0 is always false
-KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
-# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
-# source of a reference will be _MergedGlobals and not on of the whitelisted names.
-# See modpost pattern 2
-KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
-KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
-KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
-KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
-else
-
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 endif
 
 ifeq ($(config-targets),1)
@@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR
 endif
 KBUILD_CFLAGS += $(stackp-flag)
 
+ifeq ($(cc-name),clang)
+KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
+KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+# Quiet clang warning: comparison of unsigned expression < 0 is always false
+KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
+# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
+# source of a reference will be _MergedGlobals and not on of the whitelisted names.
+# See modpost pattern 2
+KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
+KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
+KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
+KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
+else
+
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+endif
+
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
index c6ecb97151a25774bf75ce5a1663210db4f6bce3..9025699049ca63b3834ed917d935224009d2d7a8 100644 (file)
@@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk)
        }
 
        if (ti->softirq_time) {
-               delta = cycle_to_nsec(ti->softirq_time));
+               delta = cycle_to_nsec(ti->softirq_time);
                account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
        }
 
index a703452d67b62f63b455098e88988fcc0b3776c6..555e22d5e07f9e21c322af718df72bdf2da0dfa4 100644 (file)
@@ -209,5 +209,11 @@ exc_##label##_book3e:
        ori     r3,r3,vector_offset@l;          \
        mtspr   SPRN_IVOR##vector_number,r3;
 
+#define RFI_TO_KERNEL                                                  \
+       rfi
+
+#define RFI_TO_USER                                                    \
+       rfi
+
 #endif /* _ASM_POWERPC_EXCEPTION_64E_H */
 
index b27205297e1d9ca5ca46db9e7189bf9187385803..7197b179c1b150ba819f13a1f7feb6c9a45da1f9 100644 (file)
  */
 #define EX_R3          EX_DAR
 
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT                                                 \
+       RFI_FLUSH_FIXUP_SECTION;                                        \
+       nop;                                                            \
+       nop;                                                            \
+       nop
+
+#define RFI_TO_KERNEL                                                  \
+       rfid
+
+#define RFI_TO_USER                                                    \
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL                                          \
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
+
+#define RFI_TO_GUEST                                                   \
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
+
+#define HRFI_TO_KERNEL                                                 \
+       hrfid
+
+#define HRFI_TO_USER                                                   \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL                                         \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
+#define HRFI_TO_GUEST                                                  \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN                                                        \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
 #ifdef CONFIG_RELOCATABLE
 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)                   \
        mfspr   r11,SPRN_##h##SRR0;     /* save SRR0 */                 \
@@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        mtspr   SPRN_##h##SRR0,r12;                                     \
        mfspr   r12,SPRN_##h##SRR1;     /* and SRR1 */                  \
        mtspr   SPRN_##h##SRR1,r10;                                     \
-       h##rfid;                                                        \
+       h##RFI_TO_KERNEL;                                               \
        b       .       /* prevent speculative execution */
 #define EXCEPTION_PROLOG_PSERIES_1(label, h)                           \
        __EXCEPTION_PROLOG_PSERIES_1(label, h)
@@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        mtspr   SPRN_##h##SRR0,r12;                                     \
        mfspr   r12,SPRN_##h##SRR1;     /* and SRR1 */                  \
        mtspr   SPRN_##h##SRR1,r10;                                     \
-       h##rfid;                                                        \
+       h##RFI_TO_KERNEL;                                               \
        b       .       /* prevent speculative execution */
 
 #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h)                      \
index 8f88f771cc55ce982c11599b273e584babd38600..1e82eb3caabd19c69289957da188b563d0bcd0d6 100644 (file)
@@ -187,7 +187,20 @@ label##3:                                          \
        FTR_ENTRY_OFFSET label##1b-label##3b;           \
        .popsection;
 
+#define RFI_FLUSH_FIXUP_SECTION                                \
+951:                                                   \
+       .pushsection __rfi_flush_fixup,"a";             \
+       .align 2;                                       \
+952:                                                   \
+       FTR_ENTRY_OFFSET 951b-952b;                     \
+       .popsection;
+
+
 #ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
 void apply_feature_fixups(void);
 void setup_feature_keys(void);
 #endif
index a409177be8bdfd5f717af6111700d088fb91e61b..f0461618bf7bee95ffd27874e33501bd41ef80a4 100644 (file)
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_PERF_COUNT        0x1BC
 #define H_MANAGE_TRACE          0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
 #define H_QUERY_INT_STATE       0x1E4
 #define H_POLL_PENDING         0x1D8
 #define H_SIGNAL_SYS_RESET_ALL_OTHERS          -2
 /* >= 0 values are CPU number */
 
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31      (1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED   (1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30     (1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2     (1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV     (1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY    (1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR       (1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR  (1ull << 61) // IBM bit 2
+
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK     0x18
 #define PROC_TABLE_DEREG       0x10
@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
        }
 }
 
+struct h_cpu_char_result {
+       u64 character;
+       u64 behaviour;
+};
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
index 3892db93b8374e1f1256a6b497d384f3faedc06e..23ac7fc0af23b6cae8f4706665102dd7e2050667 100644 (file)
@@ -232,6 +232,16 @@ struct paca_struct {
        struct sibling_subcore_state *sibling_subcore_state;
 #endif
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+       /*
+        * rfi fallback flush must be in its own cacheline to prevent
+        * other paca data leaking into the L1d
+        */
+       u64 exrfi[EX_SIZE] __aligned(0x80);
+       void *rfi_flush_fallback_area;
+       u64 l1d_flush_congruence;
+       u64 l1d_flush_sets;
+#endif
 };
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
index 7f01b22fa6cb0d0895c914423a98cb2361fb2a16..55eddf50d1498020ba7f17216c689ab89b84f3c7 100644 (file)
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
        return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
 }
 
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+       unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+       long rc;
+
+       rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+       if (rc == H_SUCCESS) {
+               p->character = retbuf[0];
+               p->behaviour = retbuf[1];
+       }
+
+       return rc;
+}
+
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
index cf00ec26303aef1415c7d5fcd7425f2149ac0295..469b7fdc9be41cd9ab0ceb7f50c2b633c19f01a4 100644 (file)
@@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
 static inline void pseries_little_endian_exceptions(void) {}
 #endif /* CONFIG_PPC_PSERIES */
 
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+       L1D_FLUSH_NONE          = 0x1,
+       L1D_FLUSH_FALLBACK      = 0x2,
+       L1D_FLUSH_ORI           = 0x4,
+       L1D_FLUSH_MTTRIG        = 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_SETUP_H */
index 6b958414b4e036ac1e4c97bceb61277ffab65e76..f390d57cf2e1a711335bbd66cf7819e9dcd8442f 100644 (file)
@@ -237,6 +237,11 @@ int main(void)
        OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
        OFFSET(PACA_IN_MCE, paca_struct, in_mce);
        OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+       OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+       OFFSET(PACA_EXRFI, paca_struct, exrfi);
+       OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
+       OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+
 #endif
        OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
        OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
index 3320bcac71928eeaf85b884076e90767be4d264f..2748584b767da3f5d788c0be27b27662053853e4 100644 (file)
 #include <asm/tm.h>
 #include <asm/ppc-opcode.h>
 #include <asm/export.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
 
 /*
  * System calls.
@@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
        ld      r13,GPR13(r1)   /* only restore r13 if returning to usermode */
+       ld      r2,GPR2(r1)
+       ld      r1,GPR1(r1)
+       mtlr    r4
+       mtcr    r5
+       mtspr   SPRN_SRR0,r7
+       mtspr   SPRN_SRR1,r8
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
+
+       /* exit to kernel */
 1:     ld      r2,GPR2(r1)
        ld      r1,GPR1(r1)
        mtlr    r4
        mtcr    r5
        mtspr   SPRN_SRR0,r7
        mtspr   SPRN_SRR1,r8
-       RFI
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
 .Lsyscall_error:
@@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        mtmsrd  r10, 1
        mtspr   SPRN_SRR0, r11
        mtspr   SPRN_SRR1, r12
-
-       rfid
+       RFI_TO_USER
        b       .       /* prevent speculative execution */
 #endif
 _ASM_NOKPROBE_SYMBOL(system_call_common);
@@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
        REST_GPR(13, r1)
-1:
+
        mtspr   SPRN_SRR1,r3
 
        ld      r2,_CCR(r1)
@@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        ld      r3,GPR3(r1)
        ld      r4,GPR4(r1)
        ld      r1,GPR1(r1)
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
 
-       rfid
+1:     mtspr   SPRN_SRR1,r3
+
+       ld      r2,_CCR(r1)
+       mtcrf   0xFF,r2
+       ld      r2,_NIP(r1)
+       mtspr   SPRN_SRR0,r2
+
+       ld      r0,GPR0(r1)
+       ld      r2,GPR2(r1)
+       ld      r3,GPR3(r1)
+       ld      r4,GPR4(r1)
+       ld      r1,GPR1(r1)
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
 #endif /* CONFIG_PPC_BOOK3E */
@@ -1073,7 +1101,7 @@ __enter_rtas:
        
        mtspr   SPRN_SRR0,r5
        mtspr   SPRN_SRR1,r6
-       rfid
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
 rtas_return_loc:
@@ -1098,7 +1126,7 @@ rtas_return_loc:
 
        mtspr   SPRN_SRR0,r3
        mtspr   SPRN_SRR1,r4
-       rfid
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
@@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
        LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
        andc    r11,r11,r12
        mtsrr1  r11
-       rfid
+       RFI_TO_KERNEL
 #endif /* CONFIG_PPC_BOOK3E */
 
 1:     /* Return from OF */
index e441b469dc8f61f6381c7c95b086b677004d401b..2dc10bf646b887b51dc2dc28feeb4aac6d9fc00d 100644 (file)
@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
        LOAD_HANDLER(r12, machine_check_handle_early)
 1:     mtspr   SPRN_SRR0,r12
        mtspr   SPRN_SRR1,r11
-       rfid
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 2:
        /* Stack overflow. Stay on emergency stack and panic.
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
        li      r3,MSR_ME
        andc    r10,r10,r3              /* Turn off MSR_ME */
        mtspr   SPRN_SRR1,r10
-       rfid
+       RFI_TO_KERNEL
        b       .
 2:
        /*
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
         */
        bl      machine_check_queue_event
        MACHINE_CHECK_HANDLER_WINDUP
-       rfid
+       RFI_TO_USER_OR_KERNEL
 9:
        /* Deliver the machine check to host kernel in V mode. */
        MACHINE_CHECK_HANDLER_WINDUP
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
        stw     r9,PACA_EXSLB+EX_CCR(r13)       /* save CR in exc. frame */
        std     r10,PACA_EXSLB+EX_LR(r13)       /* save LR */
 
+       andi.   r9,r11,MSR_PR   // Check for exception from userspace
+       cmpdi   cr4,r9,MSR_PR   // And save the result in CR4 for later
+
        /*
         * Test MSR_RI before calling slb_allocate_realmode, because the
         * MSR in r11 gets clobbered. However we still want to allocate
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
        /* All done -- return from exception. */
 
+       bne     cr4,1f          /* returning to kernel */
+
 .machine       push
 .machine       "power4"
        mtcrf   0x80,r9
+       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
        mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
        mtcrf   0x02,r9         /* I/D indication is in cr6 */
        mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
@@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        ld      r11,PACA_EXSLB+EX_R11(r13)
        ld      r12,PACA_EXSLB+EX_R12(r13)
        ld      r13,PACA_EXSLB+EX_R13(r13)
-       rfid
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
+1:
+.machine       push
+.machine       "power4"
+       mtcrf   0x80,r9
+       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
+       mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
+       mtcrf   0x02,r9         /* I/D indication is in cr6 */
+       mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
+.machine       pop
+
+       RESTORE_CTR(r9, PACA_EXSLB)
+       RESTORE_PPR_PACA(PACA_EXSLB, r9)
+       mr      r3,r12
+       ld      r9,PACA_EXSLB+EX_R9(r13)
+       ld      r10,PACA_EXSLB+EX_R10(r13)
+       ld      r11,PACA_EXSLB+EX_R11(r13)
+       ld      r12,PACA_EXSLB+EX_R12(r13)
+       ld      r13,PACA_EXSLB+EX_R13(r13)
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
+
 2:     std     r3,PACA_EXSLB+EX_DAR(r13)
        mr      r3,r12
        mfspr   r11,SPRN_SRR0
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        mtspr   SPRN_SRR0,r10
        ld      r10,PACAKMSR(r13)
        mtspr   SPRN_SRR1,r10
-       rfid
+       RFI_TO_KERNEL
        b       .
 
 8:     std     r3,PACA_EXSLB+EX_DAR(r13)
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        mtspr   SPRN_SRR0,r10
        ld      r10,PACAKMSR(r13)
        mtspr   SPRN_SRR1,r10
-       rfid
+       RFI_TO_KERNEL
        b       .
 
 EXC_COMMON_BEGIN(unrecov_slb)
@@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
        mtspr   SPRN_SRR0,r10 ;                                 \
        ld      r10,PACAKMSR(r13) ;                             \
        mtspr   SPRN_SRR1,r10 ;                                 \
-       rfid ;                                                  \
+       RFI_TO_KERNEL ;                                         \
        b       . ;     /* prevent speculative execution */
 
 #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
@@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)                              \
        xori    r12,r12,MSR_LE ;                                \
        mtspr   SPRN_SRR1,r12 ;                                 \
        mr      r13,r9 ;                                        \
-       rfid ;          /* return to userspace */               \
+       RFI_TO_USER ;   /* return to userspace */               \
        b       . ;     /* prevent speculative execution */
 #else
 #define SYSCALL_FASTENDIAN_TEST
@@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
        mtcr    r11
        REST_GPR(11, r1)
        ld      r1,GPR1(r1)
-       hrfid
+       HRFI_TO_USER_OR_KERNEL
 
 1:     mtcr    r11
        REST_GPR(11, r1)
@@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
        ld      r11,PACA_EXGEN+EX_R11(r13)
        ld      r12,PACA_EXGEN+EX_R12(r13)
        ld      r13,PACA_EXGEN+EX_R13(r13)
-       HRFID
+       HRFI_TO_UNKNOWN
        b       .
 #endif
 
@@ -1418,10 +1445,94 @@ masked_##_H##interrupt:                                 \
        ld      r10,PACA_EXGEN+EX_R10(r13);             \
        ld      r11,PACA_EXGEN+EX_R11(r13);             \
        /* returns to kernel where r13 must be set up, so don't restore it */ \
-       ##_H##rfid;                                     \
+       ##_H##RFI_TO_KERNEL;                            \
        b       .;                                      \
        MASKED_DEC_HANDLER(_H)
 
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+       SET_SCRATCH0(r13);
+       GET_PACA(r13);
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       std     r12,PACA_EXRFI+EX_R12(r13)
+       std     r8,PACA_EXRFI+EX_R13(r13)
+       mfctr   r9
+       ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+       ld      r11,PACA_L1D_FLUSH_SETS(r13)
+       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+       addi    r12,r12,8
+       mtctr   r11
+       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+       /* order ld/st prior to dcbt stop all streams with flushing */
+       sync
+1:     li      r8,0
+       .rept   8 /* 8-way set associative */
+       ldx     r11,r10,r8
+       add     r8,r8,r12
+       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
+       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
+       .endr
+       addi    r10,r10,128 /* 128 byte cache line */
+       bdnz    1b
+
+       mtctr   r9
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ld      r11,PACA_EXRFI+EX_R11(r13)
+       ld      r12,PACA_EXRFI+EX_R12(r13)
+       ld      r8,PACA_EXRFI+EX_R13(r13)
+       GET_SCRATCH0(r13);
+       rfid
+
+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
+       SET_SCRATCH0(r13);
+       GET_PACA(r13);
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       std     r12,PACA_EXRFI+EX_R12(r13)
+       std     r8,PACA_EXRFI+EX_R13(r13)
+       mfctr   r9
+       ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+       ld      r11,PACA_L1D_FLUSH_SETS(r13)
+       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+       addi    r12,r12,8
+       mtctr   r11
+       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+       /* order ld/st prior to dcbt stop all streams with flushing */
+       sync
+1:     li      r8,0
+       .rept   8 /* 8-way set associative */
+       ldx     r11,r10,r8
+       add     r8,r8,r12
+       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
+       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
+       .endr
+       addi    r10,r10,128 /* 128 byte cache line */
+       bdnz    1b
+
+       mtctr   r9
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ld      r11,PACA_EXRFI+EX_R11(r13)
+       ld      r12,PACA_EXRFI+EX_R12(r13)
+       ld      r8,PACA_EXRFI+EX_R13(r13)
+       GET_SCRATCH0(r13);
+       hrfid
+
 /*
  * Real mode exceptions actually use this too, but alternate
  * instruction code patches (which end up in the common .text area)
@@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
        addi    r13, r13, 4
        mtspr   SPRN_SRR0, r13
        GET_SCRATCH0(r13)
-       rfid
+       RFI_TO_KERNEL
        b       .
 
 TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
        addi    r13, r13, 4
        mtspr   SPRN_HSRR0, r13
        GET_SCRATCH0(r13)
-       hrfid
+       HRFI_TO_KERNEL
        b       .
 #endif
 
index 8956a9856604e72634dd11ef416ec9788660bc37..491be4179ddd989fed6cba1e63963bf47653f4ba 100644 (file)
@@ -801,3 +801,104 @@ static int __init disable_hardlockup_detector(void)
        return 0;
 }
 early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+bool rfi_flush;
+
+static int __init handle_no_rfi_flush(char *p)
+{
+       pr_info("rfi-flush: disabled on command line.");
+       no_rfi_flush = true;
+       return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+       pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+       handle_no_rfi_flush(NULL);
+       return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+       /*
+        * We don't need to do the flush explicitly, just enter+exit kernel is
+        * sufficient, the RFI exit handlers will do the right thing.
+        */
+}
+
+void rfi_flush_enable(bool enable)
+{
+       if (rfi_flush == enable)
+               return;
+
+       if (enable) {
+               do_rfi_flush_fixups(enabled_flush_types);
+               on_each_cpu(do_nothing, NULL, 1);
+       } else
+               do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+       rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+       u64 l1d_size, limit;
+       int cpu;
+
+       l1d_size = ppc64_caches.l1d.size;
+       limit = min(safe_stack_limit(), ppc64_rma_size);
+
+       /*
+        * Align to L1d size, and size it at 2x L1d size, to catch possible
+        * hardware prefetch runoff. We don't have a recipe for load patterns to
+        * reliably avoid the prefetcher.
+        */
+       l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+       memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+       for_each_possible_cpu(cpu) {
+               /*
+                * The fallback flush is currently coded for 8-way
+                * associativity. Different associativity is possible, but it
+                * will be treated as 8-way and may not evict the lines as
+                * effectively.
+                *
+                * 128 byte lines are mandatory.
+                */
+               u64 c = l1d_size / 8;
+
+               paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+               paca[cpu].l1d_flush_congruence = c;
+               paca[cpu].l1d_flush_sets = c / 128;
+       }
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+       if (types & L1D_FLUSH_FALLBACK) {
+               pr_info("rfi-flush: Using fallback displacement flush\n");
+               init_fallback_flush();
+       }
+
+       if (types & L1D_FLUSH_ORI)
+               pr_info("rfi-flush: Using ori type flush\n");
+
+       if (types & L1D_FLUSH_MTTRIG)
+               pr_info("rfi-flush: Using mttrig type flush\n");
+
+       enabled_flush_types = types;
+
+       if (!no_rfi_flush)
+               rfi_flush_enable(enable);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
index 0494e1566ee2ab9b976cb0d9edb8c195a4490a23..307843d23682a79f0e4d9ae0cdb86a219e0b6e6a 100644 (file)
@@ -132,6 +132,15 @@ SECTIONS
        /* Read-only data */
        RO_DATA(PAGE_SIZE)
 
+#ifdef CONFIG_PPC64
+       . = ALIGN(8);
+       __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+               __start___rfi_flush_fixup = .;
+               *(__rfi_flush_fixup)
+               __stop___rfi_flush_fixup = .;
+       }
+#endif
+
        EXCEPTION_TABLE(0)
 
        NOTES :kernel :notes
index 29ebe2fd58674c59f27803a5426e4d2414f39418..a93d719edc906887b0f4bf412b2b76ac04babfec 100644 (file)
@@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
                gpte->may_read = true;
                gpte->may_write = true;
                gpte->page_size = MMU_PAGE_4K;
+               gpte->wimg = HPTE_R_M;
 
                return 0;
        }
index 966097232d2147bbcd79df41354a5f973fb9b7c1..b73dbc9e797da76bf4305a73972c4af925fbd70d 100644 (file)
@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
        u32 order;
 
        /* These fields protected by kvm->lock */
+
+       /* Possible values and their usage:
+        *  <0     an error occurred during allocation,
+        *  -EBUSY allocation is in the progress,
+        *  0      allocation made successfuly.
+        */
        int error;
-       bool prepare_done;
 
-       /* Private to the work thread, until prepare_done is true,
-        * then protected by kvm->resize_hpt_sem */
+       /* Private to the work thread, until error != -EBUSY,
+        * then protected by kvm->lock.
+        */
        struct kvm_hpt_info hpt;
 };
 
@@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
                 * Reset all the reverse-mapping chains for all memslots
                 */
                kvmppc_rmap_reset(kvm);
-               /* Ensure that each vcpu will flush its TLB on next entry. */
-               cpumask_setall(&kvm->arch.need_tlb_flush);
                err = 0;
                goto out;
        }
@@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
        kvmppc_set_hpt(kvm, &info);
 
 out:
+       if (err == 0)
+               /* Ensure that each vcpu will flush its TLB on next entry. */
+               cpumask_setall(&kvm->arch.need_tlb_flush);
+
        mutex_unlock(&kvm->lock);
        return err;
 }
@@ -1413,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
 
 static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
 {
-       BUG_ON(kvm->arch.resize_hpt != resize);
+       if (WARN_ON(!mutex_is_locked(&kvm->lock)))
+               return;
 
        if (!resize)
                return;
 
-       if (resize->hpt.virt)
-               kvmppc_free_hpt(&resize->hpt);
+       if (resize->error != -EBUSY) {
+               if (resize->hpt.virt)
+                       kvmppc_free_hpt(&resize->hpt);
+               kfree(resize);
+       }
 
-       kvm->arch.resize_hpt = NULL;
-       kfree(resize);
+       if (kvm->arch.resize_hpt == resize)
+               kvm->arch.resize_hpt = NULL;
 }
 
 static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1431,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
                                                     struct kvm_resize_hpt,
                                                     work);
        struct kvm *kvm = resize->kvm;
-       int err;
+       int err = 0;
 
-       resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
-                        resize->order);
-
-       err = resize_hpt_allocate(resize);
+       if (WARN_ON(resize->error != -EBUSY))
+               return;
 
        mutex_lock(&kvm->lock);
 
+       /* Request is still current? */
+       if (kvm->arch.resize_hpt == resize) {
+               /* We may request large allocations here:
+                * do not sleep with kvm->lock held for a while.
+                */
+               mutex_unlock(&kvm->lock);
+
+               resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+                                resize->order);
+
+               err = resize_hpt_allocate(resize);
+
+               /* We have strict assumption about -EBUSY
+                * when preparing for HPT resize.
+                */
+               if (WARN_ON(err == -EBUSY))
+                       err = -EINPROGRESS;
+
+               mutex_lock(&kvm->lock);
+               /* It is possible that kvm->arch.resize_hpt != resize
+                * after we grab kvm->lock again.
+                */
+       }
+
        resize->error = err;
-       resize->prepare_done = true;
+
+       if (kvm->arch.resize_hpt != resize)
+               resize_hpt_release(kvm, resize);
 
        mutex_unlock(&kvm->lock);
 }
@@ -1466,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 
        if (resize) {
                if (resize->order == shift) {
-                       /* Suitable resize in progress */
-                       if (resize->prepare_done) {
-                               ret = resize->error;
-                               if (ret != 0)
-                                       resize_hpt_release(kvm, resize);
-                       } else {
+                       /* Suitable resize in progress? */
+                       ret = resize->error;
+                       if (ret == -EBUSY)
                                ret = 100; /* estimated time in ms */
-                       }
+                       else if (ret)
+                               resize_hpt_release(kvm, resize);
 
                        goto out;
                }
@@ -1493,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
                ret = -ENOMEM;
                goto out;
        }
+
+       resize->error = -EBUSY;
        resize->order = shift;
        resize->kvm = kvm;
        INIT_WORK(&resize->work, resize_hpt_prepare_work);
@@ -1547,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
        if (!resize || (resize->order != shift))
                goto out;
 
-       ret = -EBUSY;
-       if (!resize->prepare_done)
-               goto out;
-
        ret = resize->error;
-       if (ret != 0)
+       if (ret)
                goto out;
 
        ret = resize_hpt_rehash(resize);
-       if (ret != 0)
+       if (ret)
                goto out;
 
        resize_hpt_pivot(resize);
index 2659844784b817d7ca77e6e95ecb9418f430e62d..9c61f736c75b2d0761ec4f9d2e385df75b6dc882 100644 (file)
@@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
        mtmsrd  r0,1            /* clear RI in MSR */
        mtsrr0  r5
        mtsrr1  r6
-       RFI
+       RFI_TO_KERNEL
 
 kvmppc_call_hv_entry:
 BEGIN_FTR_SECTION
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mtmsrd  r6, 1                   /* Clear RI in MSR */
        mtsrr0  r8
        mtsrr1  r7
-       RFI
+       RFI_TO_KERNEL
 
        /* Virtual-mode return */
 .Lvirt_return:
@@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
        ld      r0, VCPU_GPR(R0)(r4)
        ld      r4, VCPU_GPR(R4)(r4)
-
-       hrfid
+       HRFI_TO_GUEST
        b       .
 
 secondary_too_late:
@@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
        ld      r4, PACAKMSR(r13)
        mtspr   SPRN_SRR0, r3
        mtspr   SPRN_SRR1, r4
-       rfid
+       RFI_TO_KERNEL
 9:     addi    r3, r1, STACK_FRAME_OVERHEAD
        bl      kvmppc_bad_interrupt
        b       9b
index d0dc8624198f8e4663800c4ca603aea98d4ba128..7deaeeb14b9358d8a4e6f1107cd42bba0605a264 100644 (file)
@@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 #define MSR_USER32 MSR_USER
 #define MSR_USER64 MSR_USER
 #define HW_PAGE_SIZE PAGE_SIZE
+#define HPTE_R_M   _PAGE_COHERENT
 #endif
 
 static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
@@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                pte.eaddr = eaddr;
                pte.vpage = eaddr >> 12;
                pte.page_size = MMU_PAGE_64K;
+               pte.wimg = HPTE_R_M;
        }
 
        switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
index 42a4b237df5f5731a4f6964feafc4db2835ffdee..34a5adeff0840662e8eae4553b008069bfd9426d 100644 (file)
@@ -46,6 +46,9 @@
 
 #define FUNC(name)             name
 
+#define RFI_TO_KERNEL  RFI
+#define RFI_TO_GUEST   RFI
+
 .macro INTERRUPT_TRAMPOLINE intno
 
 .global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
        GET_SCRATCH0(r13)
 
        /* And get back into the code */
-       RFI
+       RFI_TO_KERNEL
 #endif
 
 /*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
        ori     r5, r5, MSR_EE
        mtsrr0  r7
        mtsrr1  r6
-       RFI
+       RFI_TO_KERNEL
 
 #include "book3s_segment.S"
index 2a2b96d5399917398312dacb7c7b2846483a7fd4..93a180ceefad03343d1f9064420ee00ca54973d7 100644 (file)
@@ -156,7 +156,7 @@ no_dcbz32_on:
        PPC_LL  r9, SVCPU_R9(r3)
        PPC_LL  r3, (SVCPU_R3)(r3)
 
-       RFI
+       RFI_TO_GUEST
 kvmppc_handler_trampoline_enter_end:
 
 
@@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
        cmpwi   r12, BOOK3S_INTERRUPT_DOORBELL
        beqa    BOOK3S_INTERRUPT_DOORBELL
 
-       RFI
+       RFI_TO_KERNEL
 kvmppc_handler_trampoline_exit_end:
index 41cf5ae273cf74a2747d13b9da2274b8eb2054cb..a95ea007d654d5db2b78d811a4ef21a750a95609 100644 (file)
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
        }
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+       unsigned int instrs[3], *dest;
+       long *start, *end;
+       int i;
+
+       start = PTRRELOC(&__start___rfi_flush_fixup),
+       end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+       instrs[0] = 0x60000000; /* nop */
+       instrs[1] = 0x60000000; /* nop */
+       instrs[2] = 0x60000000; /* nop */
+
+       if (types & L1D_FLUSH_FALLBACK)
+               /* b .+16 to fallback flush */
+               instrs[0] = 0x48000010;
+
+       i = 0;
+       if (types & L1D_FLUSH_ORI) {
+               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+               instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+       }
+
+       if (types & L1D_FLUSH_MTTRIG)
+               instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+       for (i = 0; start < end; start++, i++) {
+               dest = (void *)start + *start;
+
+               pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+               patch_instruction(dest, instrs[0]);
+               patch_instruction(dest + 1, instrs[1]);
+               patch_instruction(dest + 2, instrs[2]);
+       }
+
+       printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
        long *start, *end;
index 1edfbc1e40f4387b30dd5c0a9491935460feaf92..4fb21e17504aad72295a9e1cdffe54c5547379bd 100644 (file)
 #include <asm/kexec.h>
 #include <asm/smp.h>
 #include <asm/tm.h>
+#include <asm/setup.h>
 
 #include "powernv.h"
 
+static void pnv_setup_rfi_flush(void)
+{
+       struct device_node *np, *fw_features;
+       enum l1d_flush_type type;
+       int enable;
+
+       /* Default to fallback in case fw-features are not available */
+       type = L1D_FLUSH_FALLBACK;
+       enable = 1;
+
+       np = of_find_node_by_name(NULL, "ibm,opal");
+       fw_features = of_get_child_by_name(np, "fw-features");
+       of_node_put(np);
+
+       if (fw_features) {
+               np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
+               if (np && of_property_read_bool(np, "enabled"))
+                       type = L1D_FLUSH_MTTRIG;
+
+               of_node_put(np);
+
+               np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
+               if (np && of_property_read_bool(np, "enabled"))
+                       type = L1D_FLUSH_ORI;
+
+               of_node_put(np);
+
+               /* Enable unless firmware says NOT to */
+               enable = 2;
+               np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
+               if (np && of_property_read_bool(np, "disabled"))
+                       enable--;
+
+               of_node_put(np);
+
+               np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
+               if (np && of_property_read_bool(np, "disabled"))
+                       enable--;
+
+               of_node_put(np);
+               of_node_put(fw_features);
+       }
+
+       setup_rfi_flush(type, enable > 0);
+}
+
 static void __init pnv_setup_arch(void)
 {
        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
+       pnv_setup_rfi_flush();
+
        /* Initialize SMP */
        pnv_smp_init();
 
index 6e35780c5962f25144b0181414c03a017840478f..a0b20c03f078cc41b9ad126fbf2603a4ffb68463 100644 (file)
@@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
 
 static CLASS_ATTR_RW(dlpar);
 
-static int __init pseries_dlpar_init(void)
+int __init dlpar_workqueue_init(void)
 {
+       if (pseries_hp_wq)
+               return 0;
+
        pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
-                                       WQ_UNBOUND, 1);
+                       WQ_UNBOUND, 1);
+
+       return pseries_hp_wq ? 0 : -ENOMEM;
+}
+
+static int __init dlpar_sysfs_init(void)
+{
+       int rc;
+
+       rc = dlpar_workqueue_init();
+       if (rc)
+               return rc;
+
        return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
 }
-machine_device_initcall(pseries, pseries_dlpar_init);
+machine_device_initcall(pseries, dlpar_sysfs_init);
 
index 4470a3194311e06e040c624b8b1491eb0e071298..1ae1d9f4dbe99935130971cdc390d8f581ad788e 100644 (file)
@@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)
        return CMO_PageSize;
 }
 
+int dlpar_workqueue_init(void);
+
 #endif /* _PSERIES_PSERIES_H */
index 4923ffe230cf926565c0ed4c9a339a822b073c15..81d8614e73790b1923a3c8cfd336a2531fee76ce 100644 (file)
@@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)
        /* Hotplug Events */
        np = of_find_node_by_path("/event-sources/hot-plug-events");
        if (np != NULL) {
-               request_event_sources_irqs(np, ras_hotplug_interrupt,
+               if (dlpar_workqueue_init() == 0)
+                       request_event_sources_irqs(np, ras_hotplug_interrupt,
                                           "RAS_HOTPLUG");
                of_node_put(np);
        }
index a8531e01265842f39e759bc59aa67f3afd461486..ae4f596273b51a836e5d27307f81bfe6438590bf 100644 (file)
@@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
        of_pci_check_probe_only();
 }
 
+static void pseries_setup_rfi_flush(void)
+{
+       struct h_cpu_char_result result;
+       enum l1d_flush_type types;
+       bool enable;
+       long rc;
+
+       /* Enable by default */
+       enable = true;
+
+       rc = plpar_get_cpu_characteristics(&result);
+       if (rc == H_SUCCESS) {
+               types = L1D_FLUSH_NONE;
+
+               if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+                       types |= L1D_FLUSH_MTTRIG;
+               if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+                       types |= L1D_FLUSH_ORI;
+
+               /* Use fallback if nothing set in hcall */
+               if (types == L1D_FLUSH_NONE)
+                       types = L1D_FLUSH_FALLBACK;
+
+               if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+                       enable = false;
+       } else {
+               /* Default to fallback if case hcall is not available */
+               types = L1D_FLUSH_FALLBACK;
+       }
+
+       setup_rfi_flush(types, enable);
+}
+
 static void __init pSeries_setup_arch(void)
 {
        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
 
        fwnmi_init();
 
+       pseries_setup_rfi_flush();
+
        /* By default, only probe PCI (can be overridden by rtas_pci) */
        pci_add_flags(PCI_PROBE_ONLY);
 
index d4fc98c50378c40bc901f6446d2bfff68151eb6a..20da391b5f329885e26b30e0351d73d571d28fc1 100644 (file)
@@ -55,7 +55,6 @@ config X86
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_KCOV                    if X86_64
        select ARCH_HAS_PMEM_API                if X86_64
-       # Causing hangs/crashes, see the commit that added this change for details.
        select ARCH_HAS_REFCOUNT
        select ARCH_HAS_UACCESS_FLUSHCACHE      if X86_64
        select ARCH_HAS_SET_MEMORY
@@ -89,6 +88,7 @@ config X86
        select GENERIC_CLOCKEVENTS_MIN_ADJUST
        select GENERIC_CMOS_UPDATE
        select GENERIC_CPU_AUTOPROBE
+       select GENERIC_CPU_VULNERABILITIES
        select GENERIC_EARLY_IOREMAP
        select GENERIC_FIND_FIRST_BIT
        select GENERIC_IOMAP
@@ -429,6 +429,19 @@ config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
 
+config RETPOLINE
+       bool "Avoid speculative indirect branches in kernel"
+       default y
+       help
+         Compile kernel with the retpoline compiler options to guard against
+         kernel-to-user data leaks by avoiding speculative indirect
+         branches. Requires a compiler with -mindirect-branch=thunk-extern
+         support for full protection. The kernel may run slower.
+
+         Without compiler support, at least indirect branches in assembler
+         code are eliminated. Since this includes the syscall entry path,
+         it is not entirely pointless.
+
 config INTEL_RDT
        bool "Intel Resource Director Technology support"
        default n
index 3e73bc255e4eb3edda965a6bc493e0e6ab8de354..fad55160dcb94a28e60d537d3d69d471a1e10e2e 100644 (file)
@@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
 #
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+    ifneq ($(RETPOLINE_CFLAGS),)
+        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+    endif
+endif
+
 archscripts: scripts_basic
        $(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
index 16627fec80b26b211ba3c4b50c3850c134e2bd62..3d09e3aca18dad33ba0c27c3673e49dcbfac0ce8 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/linkage.h>
 #include <asm/inst.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 /*
  * The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
        pxor INC, STATE4
        movdqu IV, 0x30(OUTP)
 
-       call *%r11
+       CALL_NOSPEC %r11
 
        movdqu 0x00(OUTP), INC
        pxor INC, STATE1
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
        _aesni_gf128mul_x_ble()
        movups IV, (IVP)
 
-       call *%r11
+       CALL_NOSPEC %r11
 
        movdqu 0x40(OUTP), INC
        pxor INC, STATE1
index f7c495e2863cb0daecb7b023e19e393cfc32c587..a14af6eb09cb074a1dcf42d0adaaf1c388ceb30b 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <linux/linkage.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
        vpxor 14 * 16(%rax), %xmm15, %xmm14;
        vpxor 15 * 16(%rax), %xmm15, %xmm15;
 
-       call *%r9;
+       CALL_NOSPEC %r9;
 
        addq $(16 * 16), %rsp;
 
index eee5b3982cfd3c6838a9a34534409df8935eba17..b66bbfa62f50d7c05ddb29b57a322979f64a6999 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
        vpxor 14 * 32(%rax), %ymm15, %ymm14;
        vpxor 15 * 32(%rax), %ymm15, %ymm15;
 
-       call *%r9;
+       CALL_NOSPEC %r9;
 
        addq $(16 * 32), %rsp;
 
index 7a7de27c6f415206f2c3b8e5a8a43d4468bc7aaa..d9b734d0c8cc78ecdc3293ca117546eb538a12fc 100644 (file)
@@ -45,6 +45,7 @@
 
 #include <asm/inst.h>
 #include <linux/linkage.h>
+#include <asm/nospec-branch.h>
 
 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 
@@ -172,7 +173,7 @@ continue_block:
        movzxw  (bufp, %rax, 2), len
        lea     crc_array(%rip), bufp
        lea     (bufp, len, 1), bufp
-       jmp     *bufp
+       JMP_NOSPEC bufp
 
        ################################################################
        ## 2a) PROCESS FULL BLOCKS:
index 45a63e00a6af9a12b4739246d6844ba94f766e71..3f48f695d5e6ac6546a009c734fcac517564b24d 100644 (file)
@@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with
  * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
  * halves:
  */
-#define PTI_SWITCH_PGTABLES_MASK       (1<<PAGE_SHIFT)
-#define PTI_SWITCH_MASK                (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+#define PTI_USER_PGTABLE_BIT           PAGE_SHIFT
+#define PTI_USER_PGTABLE_MASK          (1 << PTI_USER_PGTABLE_BIT)
+#define PTI_USER_PCID_BIT              X86_CR3_PTI_PCID_USER_BIT
+#define PTI_USER_PCID_MASK             (1 << PTI_USER_PCID_BIT)
+#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
 
 .macro SET_NOFLUSH_BIT reg:req
        bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
@@ -208,7 +211,7 @@ For 32-bit we have the following conventions - kernel is built with
 .macro ADJUST_KERNEL_CR3 reg:req
        ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
        /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
-       andq    $(~PTI_SWITCH_MASK), \reg
+       andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
 .endm
 
 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
@@ -239,15 +242,19 @@ For 32-bit we have the following conventions - kernel is built with
        /* Flush needed, clear the bit */
        btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
        movq    \scratch_reg2, \scratch_reg
-       jmp     .Lwrcr3_\@
+       jmp     .Lwrcr3_pcid_\@
 
 .Lnoflush_\@:
        movq    \scratch_reg2, \scratch_reg
        SET_NOFLUSH_BIT \scratch_reg
 
+.Lwrcr3_pcid_\@:
+       /* Flip the ASID to the user version */
+       orq     $(PTI_USER_PCID_MASK), \scratch_reg
+
 .Lwrcr3_\@:
-       /* Flip the PGD and ASID to the user version */
-       orq     $(PTI_SWITCH_MASK), \scratch_reg
+       /* Flip the PGD to the user version */
+       orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
        mov     \scratch_reg, %cr3
 .Lend_\@:
 .endm
@@ -263,17 +270,12 @@ For 32-bit we have the following conventions - kernel is built with
        movq    %cr3, \scratch_reg
        movq    \scratch_reg, \save_reg
        /*
-        * Is the "switch mask" all zero?  That means that both of
-        * these are zero:
-        *
-        *      1. The user/kernel PCID bit, and
-        *      2. The user/kernel "bit" that points CR3 to the
-        *         bottom half of the 8k PGD
-        *
-        * That indicates a kernel CR3 value, not a user CR3.
+        * Test the user pagetable bit. If set, then the user page tables
+        * are active. If clear CR3 already has the kernel page table
+        * active.
         */
-       testq   $(PTI_SWITCH_MASK), \scratch_reg
-       j     .Ldone_\@
+       bt      $PTI_USER_PGTABLE_BIT, \scratch_reg
+       jnc     .Ldone_\@
 
        ADJUST_KERNEL_CR3 \scratch_reg
        movq    \scratch_reg, %cr3
@@ -290,7 +292,7 @@ For 32-bit we have the following conventions - kernel is built with
         * KERNEL pages can always resume with NOFLUSH as we do
         * explicit flushes.
         */
-       bt      $X86_CR3_PTI_SWITCH_BIT, \save_reg
+       bt      $PTI_USER_PGTABLE_BIT, \save_reg
        jnc     .Lnoflush_\@
 
        /*
index ace8f321a5a1f2d1331cc4331a1922c9ed3d8bc1..a1f28a54f23a42e85436ed607293bea14486391c 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
        .section .entry.text, "ax"
 
@@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
 
        /* kernel thread */
 1:     movl    %edi, %eax
-       call    *%ebx
+       CALL_NOSPEC %ebx
        /*
         * A kernel thread is allowed to return here after successfully
         * calling do_execve().  Exit to userspace to complete the execve()
@@ -919,7 +920,7 @@ common_exception:
        movl    %ecx, %es
        TRACE_IRQS_OFF
        movl    %esp, %eax                      # pt_regs pointer
-       call    *%edi
+       CALL_NOSPEC %edi
        jmp     ret_from_exception
 END(common_exception)
 
index f048e384ff54e06530b657efc3b00cdf50f1ce5b..4f8e1d35a97ca7071a11f33f07e2b87c2eac7acf 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 #include <linux/err.h>
 
 #include "calling.h"
@@ -191,7 +192,7 @@ ENTRY(entry_SYSCALL_64_trampoline)
         */
        pushq   %rdi
        movq    $entry_SYSCALL_64_stage2, %rdi
-       jmp     *%rdi
+       JMP_NOSPEC %rdi
 END(entry_SYSCALL_64_trampoline)
 
        .popsection
@@ -270,7 +271,12 @@ entry_SYSCALL_64_fastpath:
         * It might end up jumping to the slow path.  If it jumps, RAX
         * and all argument registers are clobbered.
         */
+#ifdef CONFIG_RETPOLINE
+       movq    sys_call_table(, %rax, 8), %rax
+       call    __x86_indirect_thunk_rax
+#else
        call    *sys_call_table(, %rax, 8)
+#endif
 .Lentry_SYSCALL_64_after_fastpath_call:
 
        movq    %rax, RAX(%rsp)
@@ -442,7 +448,7 @@ ENTRY(stub_ptregs_64)
        jmp     entry_SYSCALL64_slow_path
 
 1:
-       jmp     *%rax                           /* Called from C */
+       JMP_NOSPEC %rax                         /* Called from C */
 END(stub_ptregs_64)
 
 .macro ptregs_stub func
@@ -521,7 +527,7 @@ ENTRY(ret_from_fork)
 1:
        /* kernel thread */
        movq    %r12, %rdi
-       call    *%rbx
+       CALL_NOSPEC %rbx
        /*
         * A kernel thread is allowed to return here after successfully
         * calling do_execve().  Exit to userspace to complete the execve()
index 141e07b0621689e745582599c009f4af1d053c6c..24ffa1e88cf948ecbe9839f6d956c69a12ecd4f0 100644 (file)
@@ -582,6 +582,24 @@ static __init int bts_init(void)
        if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
                return -ENODEV;
 
+       if (boot_cpu_has(X86_FEATURE_PTI)) {
+               /*
+                * BTS hardware writes through a virtual memory map we must
+                * either use the kernel physical map, or the user mapping of
+                * the AUX buffer.
+                *
+                * However, since this driver supports per-CPU and per-task inherit
+                * we cannot use the user mapping since it will not be availble
+                * if we're not running the owning process.
+                *
+                * With PTI we can't use the kernal map either, because its not
+                * there when we run userspace.
+                *
+                * For now, disable this driver when using PTI.
+                */
+               return -ENODEV;
+       }
+
        bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
                                  PERF_PMU_CAP_EXCLUSIVE;
        bts_pmu.task_ctx_nr     = perf_sw_context;
index ff700d81e91efcf89a2f2ac725b3e80d868c8621..0927cdc4f9460165a9159b1c4f7bea0e759bdbb9 100644 (file)
 #include <asm/pgtable.h>
 #include <asm/special_insns.h>
 #include <asm/preempt.h>
+#include <asm/asm.h>
 
 #ifndef CONFIG_X86_CMPXCHG64
 extern void cmpxchg8b_emu(void);
 #endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+INDIRECT_THUNK(sp)
+#endif /* CONFIG_RETPOLINE */
index 21ac898df2d8e478b6e214f3e816fde937ecfd03..f275447862f4583909d14b6e2469c16e4a81b951 100644 (file)
 #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME                        ( 7*32+10) /* AMD Secure Memory Encryption */
 #define X86_FEATURE_PTI                        ( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE          ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD      ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN         ( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT           ( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW      ( 7*32+16) /* AVX-512 Neural Network Instructions */
 #define X86_BUG_MONITOR                        X86_BUG(12) /* IPI required to wake up remote CPU */
 #define X86_BUG_AMD_E400               X86_BUG(13) /* CPU is among the affected by Erratum 400 */
 #define X86_BUG_CPU_MELTDOWN           X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1             X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2             X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
index 5400add2885b9646fa7d8d53c5b35e16c2794c7d..8bf450b13d9f53883db9ee89ac6d39913963907e 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/nmi.h>
 #include <asm/io.h>
 #include <asm/hyperv.h>
+#include <asm/nospec-branch.h>
 
 /*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
                return U64_MAX;
 
        __asm__ __volatile__("mov %4, %%r8\n"
-                            "call *%5"
+                            CALL_NOSPEC
                             : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                               "+c" (control), "+d" (input_address)
-                            :  "r" (output_address), "m" (hv_hypercall_pg)
+                            :  "r" (output_address),
+                               THUNK_TARGET(hv_hypercall_pg)
                             : "cc", "memory", "r8", "r9", "r10", "r11");
 #else
        u32 input_address_hi = upper_32_bits(input_address);
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
        if (!hv_hypercall_pg)
                return U64_MAX;
 
-       __asm__ __volatile__("call *%7"
+       __asm__ __volatile__(CALL_NOSPEC
                             : "=A" (hv_status),
                               "+c" (input_address_lo), ASM_CALL_CONSTRAINT
                             : "A" (control),
                               "b" (input_address_hi),
                               "D"(output_address_hi), "S"(output_address_lo),
-                              "m" (hv_hypercall_pg)
+                              THUNK_TARGET(hv_hypercall_pg)
                             : "cc", "memory");
 #endif /* !x86_64 */
        return hv_status;
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 
 #ifdef CONFIG_X86_64
        {
-               __asm__ __volatile__("call *%4"
+               __asm__ __volatile__(CALL_NOSPEC
                                     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                                       "+c" (control), "+d" (input1)
-                                    : "m" (hv_hypercall_pg)
+                                    : THUNK_TARGET(hv_hypercall_pg)
                                     : "cc", "r8", "r9", "r10", "r11");
        }
 #else
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
                u32 input1_hi = upper_32_bits(input1);
                u32 input1_lo = lower_32_bits(input1);
 
-               __asm__ __volatile__ ("call *%5"
+               __asm__ __volatile__ (CALL_NOSPEC
                                      : "=A"(hv_status),
                                        "+c"(input1_lo),
                                        ASM_CALL_CONSTRAINT
                                      : "A" (control),
                                        "b" (input1_hi),
-                                       "m" (hv_hypercall_pg)
+                                       THUNK_TARGET(hv_hypercall_pg)
                                      : "cc", "edi", "esi");
        }
 #endif
index 34c4922bbc3fb5ed95cb0819d7ac6b9cd37a7f41..e7b983a355060a6c5b5db76f1fc18475eb647ae5 100644 (file)
 #define FAM10H_MMIO_CONF_BASE_MASK     0xfffffffULL
 #define FAM10H_MMIO_CONF_BASE_SHIFT    20
 #define MSR_FAM10H_NODE_ID             0xc001100c
+#define MSR_F10H_DECFG                 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT    1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE                BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
 
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1                        0xc001001a
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644 (file)
index 0000000..402a11c
--- /dev/null
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS         16      /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version â€” two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)      \
+       mov     $(nr/2), reg;                   \
+771:                                           \
+       call    772f;                           \
+773:   /* speculation trap */                  \
+       pause;                                  \
+       jmp     773b;                           \
+772:                                           \
+       call    774f;                           \
+775:   /* speculation trap */                  \
+       pause;                                  \
+       jmp     775b;                           \
+774:                                           \
+       dec     reg;                            \
+       jnz     771b;                           \
+       add     $(BITS_PER_LONG/8) * nr, sp;
+
+#ifdef __ASSEMBLY__
+
+/*
+ * This should be used immediately before a retpoline alternative.  It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+.macro ANNOTATE_NOSPEC_ALTERNATIVE
+       .Lannotate_\@:
+       .pushsection .discard.nospec
+       .long .Lannotate_\@ - .
+       .popsection
+.endm
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+       call    .Ldo_rop_\@
+.Lspec_trap_\@:
+       pause
+       jmp     .Lspec_trap_\@
+.Ldo_rop_\@:
+       mov     \reg, (%_ASM_SP)
+       ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+       jmp     .Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+       RETPOLINE_JMP \reg
+.Ldo_call_\@:
+       call    .Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE_2 __stringify(jmp *\reg),                           \
+               __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+               __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+       jmp     *\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE_2 __stringify(call *\reg),                          \
+               __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+               __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+       call    *\reg
+#endif
+.endm
+
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE "jmp .Lskip_rsb_\@",                                \
+               __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))    \
+               \ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define ANNOTATE_NOSPEC_ALTERNATIVE                            \
+       "999:\n\t"                                              \
+       ".pushsection .discard.nospec\n\t"                      \
+       ".long 999b - .\n\t"                                    \
+       ".popsection\n\t"
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC                                           \
+       ANNOTATE_NOSPEC_ALTERNATIVE                             \
+       ALTERNATIVE(                                            \
+       "call *%[thunk_target]\n",                              \
+       "call __x86_indirect_thunk_%V[thunk_target]\n",         \
+       X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",    \
+       "       jmp    904f;\n"                                 \
+       "       .align 16\n"                                    \
+       "901:   call   903f;\n"                                 \
+       "902:   pause;\n"                                       \
+       "       jmp    902b;\n"                                 \
+       "       .align 16\n"                                    \
+       "903:   addl   $4, %%esp;\n"                            \
+       "       pushl  %[thunk_target];\n"                      \
+       "       ret;\n"                                         \
+       "       .align 16\n"                                    \
+       "904:   call   901b;\n",                                \
+       X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+       SPECTRE_V2_NONE,
+       SPECTRE_V2_RETPOLINE_MINIMAL,
+       SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+       SPECTRE_V2_RETPOLINE_GENERIC,
+       SPECTRE_V2_RETPOLINE_AMD,
+       SPECTRE_V2_IBRS,
+};
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+       unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+       asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+                     ALTERNATIVE("jmp 910f",
+                                 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+                                 X86_FEATURE_RETPOLINE)
+                     "910:"
+                     : "=&r" (loops), ASM_CALL_CONSTRAINT
+                     : "r" (loops) : "memory" );
+#endif
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
index 7a5d6695abd37e737bfb0e532ec4d86c53256433..eb66fa9cd0fc6187d55dc0c1a8fb30e5e32687a8 100644 (file)
@@ -38,6 +38,7 @@ do {                                          \
 #define PCI_NOASSIGN_ROMS      0x80000
 #define PCI_ROOT_NO_CRS                0x100000
 #define PCI_NOASSIGN_BARS      0x200000
+#define PCI_BIG_ROOT_WINDOW    0x400000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
index 6a60fea90b9d9dd669033e4d7eee627b99a0c3d5..625a52a5594f53b3ddc889843b92d228cc39a8e8 100644 (file)
@@ -40,7 +40,7 @@
 #define CR3_NOFLUSH    BIT_ULL(63)
 
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
-# define X86_CR3_PTI_SWITCH_BIT        11
+# define X86_CR3_PTI_PCID_USER_BIT     11
 #endif
 
 #else
index 4a08dd2ab32ade77dcbfff6a8e8b77f5d5e43764..d33e4a26dc7ed2db51687ddb4ca7ee3a35ddf835 100644 (file)
@@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid)
         * Make sure that the dynamic ASID space does not confict with the
         * bit we are using to switch between user and kernel ASIDs.
         */
-       BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
+       BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
 
        /*
         * The ASID being passed in here should have respected the
         * MAX_ASID_AVAILABLE and thus never have the switch bit set.
         */
-       VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
+       VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
 #endif
        /*
         * The dynamically-assigned ASIDs that get passed in are small
@@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid)
 {
        u16 ret = kern_pcid(asid);
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
-       ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
+       ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
 #endif
        return ret;
 }
index 7cb282e9e58777aeb2ffd86b344d39a5189c5f84..bfd882617613923f0db79c4aae82690bda7a8d4f 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/smap.h>
+#include <asm/nospec-branch.h>
 
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
        __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
 
        stac();
-       asm volatile("call *%[call]"
+       asm volatile(CALL_NOSPEC
                     : __HYPERCALL_5PARAM
-                    : [call] "a" (&hypercall_page[call])
+                    : [thunk_target] "a" (&hypercall_page[call])
                     : __HYPERCALL_CLOBBER5);
        clac();
 
index dbaf14d69ebd510b86a4a0e9e83bbd1862d99d14..4817d743c26359c697559053a23518a5e25b241b 100644 (file)
@@ -344,9 +344,12 @@ done:
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
        unsigned long flags;
+       int i;
 
-       if (instr[0] != 0x90)
-               return;
+       for (i = 0; i < a->padlen; i++) {
+               if (instr[i] != 0x90)
+                       return;
+       }
 
        local_irq_save(flags);
        add_nops(instr + (a->instrlen - a->padlen), a->padlen);
index bcb75dc97d44075d2eecb3137b91f934072352b0..ea831c85819583c3dd0b0a48c2a31518eaa67b44 100644 (file)
@@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
                set_cpu_cap(c, X86_FEATURE_K8);
 
        if (cpu_has(c, X86_FEATURE_XMM2)) {
-               /* MFENCE stops RDTSC speculation */
-               set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+               unsigned long long val;
+               int ret;
+
+               /*
+                * A serializing LFENCE has less overhead than MFENCE, so
+                * use it for execution serialization.  On families which
+                * don't have that MSR, LFENCE is already serializing.
+                * msr_set_bit() uses the safe accessors, too, even if the MSR
+                * is not present.
+                */
+               msr_set_bit(MSR_F10H_DECFG,
+                           MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+               /*
+                * Verify that the MSR write was successful (could be running
+                * under a hypervisor) and only then assume that LFENCE is
+                * serializing.
+                */
+               ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+               if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+                       /* A serializing LFENCE stops RDTSC speculation */
+                       set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+               } else {
+                       /* MFENCE stops RDTSC speculation */
+                       set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+               }
        }
 
        /*
index ba0b2424c9b050100ac6eeef97c4aef45181d455..e4dc26185aa70bcdecf8ca932b6c720121819558 100644 (file)
  */
 #include <linux/init.h>
 #include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
 #include <asm/bugs.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
@@ -20,6 +24,8 @@
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
 
+static void __init spectre_v2_select_mitigation(void);
+
 void __init check_bugs(void)
 {
        identify_boot_cpu();
@@ -29,6 +35,9 @@ void __init check_bugs(void)
                print_cpu_info(&boot_cpu_data);
        }
 
+       /* Select the proper spectre mitigation before patching alternatives */
+       spectre_v2_select_mitigation();
+
 #ifdef CONFIG_X86_32
        /*
         * Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +69,179 @@ void __init check_bugs(void)
                set_memory_4k((unsigned long)__va(0), 1);
 #endif
 }
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+       SPECTRE_V2_CMD_NONE,
+       SPECTRE_V2_CMD_AUTO,
+       SPECTRE_V2_CMD_FORCE,
+       SPECTRE_V2_CMD_RETPOLINE,
+       SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+       SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+       [SPECTRE_V2_NONE]                       = "Vulnerable",
+       [SPECTRE_V2_RETPOLINE_MINIMAL]          = "Vulnerable: Minimal generic ASM retpoline",
+       [SPECTRE_V2_RETPOLINE_MINIMAL_AMD]      = "Vulnerable: Minimal AMD ASM retpoline",
+       [SPECTRE_V2_RETPOLINE_GENERIC]          = "Mitigation: Full generic retpoline",
+       [SPECTRE_V2_RETPOLINE_AMD]              = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+       if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+               pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+               pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+       return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+       int len = strlen(opt);
+
+       return len == arglen && !strncmp(arg, opt, len);
+}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+       char arg[20];
+       int ret;
+
+       ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+                                 sizeof(arg));
+       if (ret > 0)  {
+               if (match_option(arg, ret, "off")) {
+                       goto disable;
+               } else if (match_option(arg, ret, "on")) {
+                       spec2_print_if_secure("force enabled on command line.");
+                       return SPECTRE_V2_CMD_FORCE;
+               } else if (match_option(arg, ret, "retpoline")) {
+                       spec2_print_if_insecure("retpoline selected on command line.");
+                       return SPECTRE_V2_CMD_RETPOLINE;
+               } else if (match_option(arg, ret, "retpoline,amd")) {
+                       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+                               pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+                               return SPECTRE_V2_CMD_AUTO;
+                       }
+                       spec2_print_if_insecure("AMD retpoline selected on command line.");
+                       return SPECTRE_V2_CMD_RETPOLINE_AMD;
+               } else if (match_option(arg, ret, "retpoline,generic")) {
+                       spec2_print_if_insecure("generic retpoline selected on command line.");
+                       return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+               } else if (match_option(arg, ret, "auto")) {
+                       return SPECTRE_V2_CMD_AUTO;
+               }
+       }
+
+       if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+               return SPECTRE_V2_CMD_AUTO;
+disable:
+       spec2_print_if_insecure("disabled on command line.");
+       return SPECTRE_V2_CMD_NONE;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+       enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+       enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+       /*
+        * If the CPU is not affected and the command line mode is NONE or AUTO
+        * then nothing to do.
+        */
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+           (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+               return;
+
+       switch (cmd) {
+       case SPECTRE_V2_CMD_NONE:
+               return;
+
+       case SPECTRE_V2_CMD_FORCE:
+               /* FALLTRHU */
+       case SPECTRE_V2_CMD_AUTO:
+               goto retpoline_auto;
+
+       case SPECTRE_V2_CMD_RETPOLINE_AMD:
+               if (IS_ENABLED(CONFIG_RETPOLINE))
+                       goto retpoline_amd;
+               break;
+       case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+               if (IS_ENABLED(CONFIG_RETPOLINE))
+                       goto retpoline_generic;
+               break;
+       case SPECTRE_V2_CMD_RETPOLINE:
+               if (IS_ENABLED(CONFIG_RETPOLINE))
+                       goto retpoline_auto;
+               break;
+       }
+       pr_err("kernel not compiled with retpoline; no mitigation available!");
+       return;
+
+retpoline_auto:
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+       retpoline_amd:
+               if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+                       pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+                       goto retpoline_generic;
+               }
+               mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+                                        SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+       } else {
+       retpoline_generic:
+               mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+                                        SPECTRE_V2_RETPOLINE_MINIMAL;
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+       }
+
+       spectre_v2_enabled = mode;
+       pr_info("%s\n", spectre_v2_strings[mode]);
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+                         struct device_attribute *attr, char *buf)
+{
+       if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               return sprintf(buf, "Not affected\n");
+       if (boot_cpu_has(X86_FEATURE_PTI))
+               return sprintf(buf, "Mitigation: PTI\n");
+       return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+               return sprintf(buf, "Not affected\n");
+       return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+               return sprintf(buf, "Not affected\n");
+
+       return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif
index 39d7ea865207d102d2f03fc9ca779b2bdbb03c61..ef29ad001991d6acdd5b59cd707d85f9ff99f9ea 100644 (file)
@@ -926,6 +926,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
        if (c->x86_vendor != X86_VENDOR_AMD)
                setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
 
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
        fpu__init_system(c);
 
 #ifdef CONFIG_X86_32
index 8ccdca6d3f9e9b876ee27f021ed8c021b1168220..d9e460fc7a3b309327c8f2899cc54a7be2b9dc6b 100644 (file)
@@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
 {
        struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-       if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
-               pr_err_once("late loading on model 79 is disabled.\n");
+       /*
+        * Late loading on model 79 with microcode revision less than 0x0b000021
+        * may result in a system hang. This behavior is documented in item
+        * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+        */
+       if (c->x86 == 6 &&
+           c->x86_model == INTEL_FAM6_BROADWELL_X &&
+           c->x86_mask == 0x01 &&
+           c->microcode < 0x0b000021) {
+               pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+               pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
                return true;
        }
 
index b6c6468e10bc96625c25c4ae8d19b734af81366b..4c8440de33559942a98245cf123a5ed10a30e406 100644 (file)
@@ -8,6 +8,7 @@
 #include <asm/segment.h>
 #include <asm/export.h>
 #include <asm/ftrace.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CC_USING_FENTRY
 # define function_hook __fentry__
@@ -197,7 +198,8 @@ ftrace_stub:
        movl    0x4(%ebp), %edx
        subl    $MCOUNT_INSN_SIZE, %eax
 
-       call    *ftrace_trace_function
+       movl    ftrace_trace_function, %ecx
+       CALL_NOSPEC %ecx
 
        popl    %edx
        popl    %ecx
@@ -241,5 +243,5 @@ return_to_handler:
        movl    %eax, %ecx
        popl    %edx
        popl    %eax
-       jmp     *%ecx
+       JMP_NOSPEC %ecx
 #endif
index c832291d948a6b4fd487a3e42fab8b8f7b9dd244..7cb8ba08beb997ef66724e2db5e66021c5ce32ee 100644 (file)
@@ -7,7 +7,7 @@
 #include <asm/ptrace.h>
 #include <asm/ftrace.h>
 #include <asm/export.h>
-
+#include <asm/nospec-branch.h>
 
        .code64
        .section .entry.text, "ax"
@@ -286,8 +286,8 @@ trace:
         * ip and parent ip are used and the list function is called when
         * function tracing is enabled.
         */
-       call   *ftrace_trace_function
-
+       movq ftrace_trace_function, %r8
+       CALL_NOSPEC %r8
        restore_mcount_regs
 
        jmp fgraph_trace
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
        movq 8(%rsp), %rdx
        movq (%rsp), %rax
        addq $24, %rsp
-       jmp *%rdi
+       JMP_NOSPEC %rdi
 #endif
index a83b3346a0e104833793687aea68f7f216cf0374..c1bdbd3d3232cb83d07bfa4ce604ae0b620c796a 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 
 #include <asm/apic.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
 static void call_on_stack(void *func, void *stack)
 {
        asm volatile("xchgl     %%ebx,%%esp     \n"
-                    "call      *%%edi          \n"
+                    CALL_NOSPEC
                     "movl      %%ebx,%%esp     \n"
                     : "=b" (stack)
                     : "0" (stack),
-                      "D"(func)
+                      [thunk_target] "D"(func)
                     : "memory", "cc", "edx", "ecx", "eax");
 }
 
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
                call_on_stack(print_stack_overflow, isp);
 
        asm volatile("xchgl     %%ebx,%%esp     \n"
-                    "call      *%%edi          \n"
+                    CALL_NOSPEC
                     "movl      %%ebx,%%esp     \n"
                     : "=a" (arg1), "=b" (isp)
                     :  "0" (desc),   "1" (isp),
-                       "D" (desc->handle_irq)
+                       [thunk_target] "D" (desc->handle_irq)
                     : "memory", "cc", "ecx");
        return 1;
 }
index a4eb27918cebf99dc2415f7eec49c4838d6f9f41..a2486f4440734756d49a96313c0e2f9a174a87db 100644 (file)
@@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
                return -1;
        set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
        pte_unmap(pte);
+
+       /*
+        * PTI poisons low addresses in the kernel page tables in the
+        * name of making them unusable for userspace.  To execute
+        * code at such a low address, the poison must be cleared.
+        *
+        * Note: 'pgd' actually gets set in p4d_alloc() _or_
+        * pud_alloc() depending on 4/5-level paging.
+        */
+       pgd->pgd &= ~_PAGE_NX;
+
        return 0;
 }
 
index c4deb1f34faa6ce7ffe6bcaaebddc3e87b2a9a69..2b8eb4da4d0823bdcdf7bde1feb44132d0113cde 100644 (file)
@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 {
        if (unlikely(!lapic_in_kernel(vcpu) ||
-                    kvm_event_needs_reinjection(vcpu)))
+                    kvm_event_needs_reinjection(vcpu) ||
+                    vcpu->arch.exception.pending))
                return false;
 
        if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
 
 int kvm_mmu_module_init(void)
 {
+       int ret = -ENOMEM;
+
        kvm_mmu_clear_all_pte_masks();
 
        pte_list_desc_cache = kmem_cache_create("pte_list_desc",
                                            sizeof(struct pte_list_desc),
                                            0, SLAB_ACCOUNT, NULL);
        if (!pte_list_desc_cache)
-               goto nomem;
+               goto out;
 
        mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
                                                  sizeof(struct kvm_mmu_page),
                                                  0, SLAB_ACCOUNT, NULL);
        if (!mmu_page_header_cache)
-               goto nomem;
+               goto out;
 
        if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
-               goto nomem;
+               goto out;
 
-       register_shrinker(&mmu_shrinker);
+       ret = register_shrinker(&mmu_shrinker);
+       if (ret)
+               goto out;
 
        return 0;
 
-nomem:
+out:
        mmu_destroy_caches();
-       return -ENOMEM;
+       return ret;
 }
 
 /*
index bb31c801f1fc9d45c1dc629bd9c056769584530a..f40d0da1f1d321e4705f24ee85b80ad15233e438 100644 (file)
@@ -45,6 +45,7 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -361,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm)
 {
        struct vmcb_control_area *c, *h;
        struct nested_state *g;
-       u32 h_intercept_exceptions;
 
        mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 
@@ -372,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm)
        h = &svm->nested.hsave->control;
        g = &svm->nested;
 
-       /* No need to intercept #UD if L1 doesn't intercept it */
-       h_intercept_exceptions =
-               h->intercept_exceptions & ~(1U << UD_VECTOR);
-
        c->intercept_cr = h->intercept_cr | g->intercept_cr;
        c->intercept_dr = h->intercept_dr | g->intercept_dr;
-       c->intercept_exceptions =
-               h_intercept_exceptions | g->intercept_exceptions;
+       c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
        c->intercept = h->intercept | g->intercept;
 }
 
@@ -2202,7 +2197,6 @@ static int ud_interception(struct vcpu_svm *svm)
 {
        int er;
 
-       WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
        er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
        if (er == EMULATE_USER_EXIT)
                return 0;
@@ -5034,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
                );
 
+       /* Eliminate branch target predictions from guest mode */
+       vmexit_fill_RSB();
+
 #ifdef CONFIG_X86_64
        wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else
index 5c14d65f676a99028b21108e272b9aa44bfe351c..c829d89e2e63f85bc36c6821b0ca1d7712297043 100644 (file)
@@ -50,6 +50,7 @@
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
 
 #include "trace.h"
 #include "pmu.h"
@@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
 {
        BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
 
-       if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
-           vmcs_field_to_offset_table[field] == 0)
+       if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+               return -ENOENT;
+
+       /*
+        * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
+        * generic mechanism.
+        */
+       asm("lfence");
+
+       if (vmcs_field_to_offset_table[field] == 0)
                return -ENOENT;
 
        return vmcs_field_to_offset_table[field];
@@ -1887,7 +1896,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
        u32 eb;
 
-       eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
+       eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
             (1u << DB_VECTOR) | (1u << AC_VECTOR);
        if ((vcpu->guest_debug &
             (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1905,8 +1914,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
         */
        if (is_guest_mode(vcpu))
                eb |= get_vmcs12(vcpu)->exception_bitmap;
-       else
-               eb |= 1u << UD_VECTOR;
 
        vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -5917,7 +5924,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                return 1;  /* already handled by vmx_vcpu_run() */
 
        if (is_invalid_opcode(intr_info)) {
-               WARN_ON_ONCE(is_guest_mode(vcpu));
                er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
                if (er == EMULATE_USER_EXIT)
                        return 0;
@@ -9485,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
              );
 
+       /* Eliminate branch target predictions from guest mode */
+       vmexit_fill_RSB();
+
        /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
        if (debugctlmsr)
                update_debugctlmsr(debugctlmsr);
index 7b181b61170e769fc41a062a3eddbb62c4e53793..f23934bbaf4ebe6a55331669160d6b0aa72ba5d4 100644 (file)
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
index 4d34bb548b41ebdddc20fcf464aad4cb44c6a763..46e71a74e6129b861c233ccf86f452b485e62d59 100644 (file)
@@ -29,7 +29,8 @@
 #include <asm/errno.h>
 #include <asm/asm.h>
 #include <asm/export.h>
-                               
+#include <asm/nospec-branch.h>
+
 /*
  * computes a partial checksum, e.g. for TCP/UDP fragments
  */
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
        negl %ebx
        lea 45f(%ebx,%ebx,2), %ebx
        testl %esi, %esi
-       jmp *%ebx
+       JMP_NOSPEC %ebx
 
        # Handle 2-byte-aligned regions
 20:    addw (%esi), %ax
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
        andl $-32,%edx
        lea 3f(%ebx,%ebx), %ebx
        testl %esi, %esi 
-       jmp *%ebx
+       JMP_NOSPEC %ebx
 1:     addl $64,%esi
        addl $64,%edi 
        SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644 (file)
index 0000000..cb45c6c
--- /dev/null
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+       .section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86_indirect_thunk_\reg)
+       CFI_STARTPROC
+       JMP_NOSPEC %\reg
+       CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+GENERATE_THUNK(_ASM_SP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif
index 43d4a4a29037ed9064668944e59f400490d5b658..ce38f165489b5a13d92091c8671879f30ce44e20 100644 (file)
@@ -149,7 +149,7 @@ pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
  *
  * Returns a pointer to a P4D on success, or NULL on failure.
  */
-static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
 {
        pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
        gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
@@ -164,12 +164,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
                if (!new_p4d_page)
                        return NULL;
 
-               if (pgd_none(*pgd)) {
-                       set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
-                       new_p4d_page = 0;
-               }
-               if (new_p4d_page)
-                       free_page(new_p4d_page);
+               set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
        }
        BUILD_BUG_ON(pgd_large(*pgd) != 0);
 
@@ -182,7 +177,7 @@ static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
  *
  * Returns a pointer to a PMD on success, or NULL on failure.
  */
-static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 {
        gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
        p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
@@ -194,12 +189,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
                if (!new_pud_page)
                        return NULL;
 
-               if (p4d_none(*p4d)) {
-                       set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
-                       new_pud_page = 0;
-               }
-               if (new_pud_page)
-                       free_page(new_pud_page);
+               set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
        }
 
        pud = pud_offset(p4d, address);
@@ -213,12 +203,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
                if (!new_pmd_page)
                        return NULL;
 
-               if (pud_none(*pud)) {
-                       set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
-                       new_pmd_page = 0;
-               }
-               if (new_pmd_page)
-                       free_page(new_pmd_page);
+               set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
        }
 
        return pmd_offset(pud, address);
@@ -251,12 +236,7 @@ static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
                if (!new_pte_page)
                        return NULL;
 
-               if (pmd_none(*pmd)) {
-                       set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
-                       new_pte_page = 0;
-               }
-               if (new_pte_page)
-                       free_page(new_pte_page);
+               set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
        }
 
        pte = pte_offset_kernel(pmd, address);
index 7a5350d08cef711a14c29cf1f8fcedb70ecc7465..563049c483a12c5fe587e463fb96bff4dde22c5a 100644 (file)
@@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str)
        } else if (!strcmp(str, "nocrs")) {
                pci_probe |= PCI_ROOT_NO_CRS;
                return NULL;
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+       } else if (!strcmp(str, "big_root_window")) {
+               pci_probe |= PCI_BIG_ROOT_WINDOW;
+               return NULL;
+#endif
        } else if (!strcmp(str, "earlydump")) {
                pci_early_dump_regs = 1;
                return NULL;
index e663d6bf1328ebe2327c990f9d19557a49a2124a..f6a26e3cb4763325465df6ec19efb282e26e41fd 100644 (file)
@@ -662,10 +662,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
  */
 static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
 {
-       unsigned i;
        u32 base, limit, high;
-       struct resource *res, *conflict;
        struct pci_dev *other;
+       struct resource *res;
+       unsigned i;
+       int r;
+
+       if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
+               return;
 
        /* Check that we are the only device of that type */
        other = pci_get_device(dev->vendor, dev->device, NULL);
@@ -699,22 +703,25 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
        if (!res)
                return;
 
+       /*
+        * Allocate a 256GB window directly below the 0xfd00000000 hardware
+        * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
+        */
        res->name = "PCI Bus 0000:00";
        res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
                IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
-       res->start = 0x100000000ull;
+       res->start = 0xbd00000000ull;
        res->end = 0xfd00000000ull - 1;
 
-       /* Just grab the free area behind system memory for this */
-       while ((conflict = request_resource_conflict(&iomem_resource, res))) {
-               if (conflict->end >= res->end) {
-                       kfree(res);
-                       return;
-               }
-               res->start = conflict->end + 1;
+       r = request_resource(&iomem_resource, res);
+       if (r) {
+               kfree(res);
+               return;
        }
 
-       dev_info(&dev->dev, "adding root bus resource %pR\n", res);
+       dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
+                res);
+       add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 
        base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
                AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
index d87ac96e37ede3ea93dabb67d99538fdadc03de0..2dd15e967c3f5257c530d53c7d4f51f2e3165190 100644 (file)
@@ -135,7 +135,9 @@ pgd_t * __init efi_call_phys_prolog(void)
                                pud[j] = *pud_offset(p4d_k, vaddr);
                        }
                }
+               pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
        }
+
 out:
        __flush_tlb_all();
 
index dc036e511f48d3adc3261ca4d4214c1c6244dbe6..5a0483e7bf662cb27044b7684567b644dfe49b81 100644 (file)
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
        return 0;
 }
 
-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
        .setup  = tng_bt_sfi_setup,
 };
 
index 4d62c071b166f65c848a12ca07bfe44ca20e198a..d85076223a696d0b00bee7c22a9e28b1e66dc975 100644 (file)
@@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 {
        struct {
                struct mmuext_op op;
-#ifdef CONFIG_SMP
-               DECLARE_BITMAP(mask, num_processors);
-#else
                DECLARE_BITMAP(mask, NR_CPUS);
-#endif
        } *args;
        struct multicall_space mcs;
+       const size_t mc_entry_size = sizeof(args->op) +
+               sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
 
        trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
 
        if (cpumask_empty(cpus))
                return;         /* nothing to do */
 
-       mcs = xen_mc_entry(sizeof(*args));
+       mcs = xen_mc_entry(mc_entry_size);
        args = mcs.args;
        args->op.arg2.vcpumask = to_cpumask(args->mask);
 
index 75011b80660f6262206b6759e7b63a06a1d809ea..3b34745d0a52dd097d9c2d8955bd4afce1d24ee1 100644 (file)
@@ -72,7 +72,7 @@ u64 xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
 void xen_save_time_memory_area(void);
 void xen_restore_time_memory_area(void);
-void __init xen_init_time_ops(void);
+void __ref xen_init_time_ops(void);
 void __init xen_hvm_init_time_ops(void);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
index 60d7366ed343e9ad6a76b9dd36be22fbd7543c89..9a636f961572b99af844b1d8a28bebfd9463c9e2 100644 (file)
@@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 
                        spawn->alg = NULL;
                        spawns = &inst->alg.cra_users;
+
+                       /*
+                        * We may encounter an unregistered instance here, since
+                        * an instance's spawns are set up prior to the instance
+                        * being registered.  An unregistered instance will have
+                        * NULL ->cra_users.next, since ->cra_users isn't
+                        * properly initialized until registration.  But an
+                        * unregistered instance cannot have any users, so treat
+                        * it the same as ->cra_users being empty.
+                        */
+                       if (spawns->next == NULL)
+                               break;
                }
        } while ((spawns = crypto_more_spawns(alg, &stack, &top,
                                              &secondary_spawns)));
index bdc87907d6a1b297276e9c4acbb89175f69d0d08..2415ad9f6dd4f14fdf7132387e9c9fb2c4b1df26 100644 (file)
@@ -236,6 +236,9 @@ config GENERIC_CPU_DEVICES
 config GENERIC_CPU_AUTOPROBE
        bool
 
+config GENERIC_CPU_VULNERABILITIES
+       bool
+
 config SOC_BUS
        bool
        select GLOB
index 58a9b608d8216a67f13d6c514d92290dc0a27de3..d99038487a0d2fab5164f8df3f1e0f7e58ea8ea3 100644 (file)
@@ -511,10 +511,58 @@ static void __init cpu_dev_register_generic(void)
 #endif
 }
 
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t __weak cpu_show_meltdown(struct device *dev,
+                                struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v1(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v2(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Not affected\n");
+}
+
+static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+
+static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+       &dev_attr_meltdown.attr,
+       &dev_attr_spectre_v1.attr,
+       &dev_attr_spectre_v2.attr,
+       NULL
+};
+
+static const struct attribute_group cpu_root_vulnerabilities_group = {
+       .name  = "vulnerabilities",
+       .attrs = cpu_root_vulnerabilities_attrs,
+};
+
+static void __init cpu_register_vulnerabilities(void)
+{
+       if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
+                              &cpu_root_vulnerabilities_group))
+               pr_err("Unable to register CPU vulnerabilities\n");
+}
+
+#else
+static inline void cpu_register_vulnerabilities(void) { }
+#endif
+
 void __init cpu_dev_init(void)
 {
        if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
                panic("Failed to register CPU subsystem");
 
        cpu_dev_register_generic();
+       cpu_register_vulnerabilities();
 }
index 85d4c57870fb7a2c577803d12e3c0bf219cf056f..49af94627c8a1e1f3446dad2ebb625075112ba07 100644 (file)
@@ -2777,12 +2777,12 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 }
 
 static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt,
-               unsigned int opcode, int rings)
+               unsigned int opcode, unsigned long rings)
 {
        struct cmd_info *info = NULL;
        unsigned int ring;
 
-       for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) {
+       for_each_set_bit(ring, &rings, I915_NUM_ENGINES) {
                info = find_cmd_entry(gvt, opcode, ring);
                if (info)
                        break;
index 8e331142badbcbad4ceebbd0c0b2e2fa2fb8584c..64d67ff9bf084a3c02e3c2f808ed9af066208f97 100644 (file)
@@ -1359,12 +1359,15 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
                        return ret;
        } else {
                if (!test_bit(index, spt->post_shadow_bitmap)) {
+                       int type = spt->shadow_page.type;
+
                        ppgtt_get_shadow_entry(spt, &se, index);
                        ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
                        if (ret)
                                return ret;
+                       ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
+                       ppgtt_set_shadow_entry(spt, &se, index);
                }
-
                ppgtt_set_post_shadow(spt, index);
        }
 
index 18de6569d04aef46aad4af88edb20f0a94d91b53..5cfba89ed586391409cf02459344b668c9c181bc 100644 (file)
@@ -467,7 +467,7 @@ static void __fence_set_priority(struct dma_fence *fence, int prio)
        struct drm_i915_gem_request *rq;
        struct intel_engine_cs *engine;
 
-       if (!dma_fence_is_i915(fence))
+       if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
                return;
 
        rq = to_request(fence);
index 333f40bc03bb052adddb75c29e48305a51e421e1..7923dfd9963c6e40b0e7e84d8a9d9bf0cca87a1f 100644 (file)
@@ -7027,6 +7027,8 @@ enum {
 #define GEN9_SLICE_COMMON_ECO_CHICKEN0         _MMIO(0x7308)
 #define  DISABLE_PIXEL_MASK_CAMMING            (1<<14)
 
+#define GEN9_SLICE_COMMON_ECO_CHICKEN1         _MMIO(0x731c)
+
 #define GEN7_L3SQCREG1                         _MMIO(0xB010)
 #define  VLV_B0_WA_L3SQCREG1_VALUE             0x00D30000
 
index ab5bf4e2e28e21f3daa7072a37cb6895aba8233c..6074e04dc99fca3d269d78ac2aa4e9159e78aedb 100644 (file)
@@ -1390,6 +1390,11 @@ static int glk_init_workarounds(struct intel_engine_cs *engine)
        if (ret)
                return ret;
 
+       /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
+       ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
+       if (ret)
+               return ret;
+
        /* WaToEnableHwFixForPushConstHWBug:glk */
        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
index d36e2560743545cef7b32b24adb794e719ca5da2..e71a8cd50498c338aa0e0f3c03c2b62e0e8823b8 100644 (file)
@@ -974,6 +974,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
        GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
+       if (i915_gem_request_completed(request))
+               return;
+
        if (prio <= READ_ONCE(request->priotree.priority))
                return;
 
index a2978a37b4f3c72ef4e6f03b1fb7f2c0c7500478..700fc754f28a4c32a001c243822b5e97733ba0a7 100644 (file)
@@ -174,6 +174,7 @@ gf119_sor = {
                .links = gf119_sor_dp_links,
                .power = g94_sor_dp_power,
                .pattern = gf119_sor_dp_pattern,
+               .drive = gf119_sor_dp_drive,
                .vcpi = gf119_sor_dp_vcpi,
                .audio = gf119_sor_dp_audio,
                .audio_sym = gf119_sor_dp_audio_sym,
index b0a1dedac8026e0ad89ee1227cb7b7881d3fee7d..476079f1255f6c51e5cba11ff6f05f1c315a691e 100644 (file)
@@ -2656,6 +2656,9 @@ static int tegra_sor_probe(struct platform_device *pdev)
                                name, err);
                        goto remove;
                }
+       } else {
+               /* fall back to the module clock on SOR0 (eDP/LVDS only) */
+               sor->clk_out = sor->clk;
        }
 
        sor->clk_parent = devm_clk_get(&pdev->dev, "parent");
index 26eddbb628936b91f20a000c405bfbc536324e89..3dd62d75f5319dbffcd9d27ea6f60927d193eb92 100644 (file)
@@ -209,9 +209,6 @@ vc4_irq_postinstall(struct drm_device *dev)
 {
        struct vc4_dev *vc4 = to_vc4_dev(dev);
 
-       /* Undo the effects of a previous vc4_irq_uninstall. */
-       enable_irq(dev->irq);
-
        /* Enable both the render done and out of memory interrupts. */
        V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
 
index 622cd43840b8c53588396b6d1d82b5d234a69113..493f392b3a0a90e70820d3582c0847ebe3cc0bd7 100644 (file)
@@ -327,6 +327,9 @@ static int vc4_v3d_runtime_resume(struct device *dev)
                return ret;
 
        vc4_v3d_init_hw(vc4->dev);
+
+       /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */
+       enable_irq(vc4->dev->irq);
        vc4_irq_postinstall(vc4->dev);
 
        return 0;
index 21c62a34e5580af7e56505a64bbae48707ae2599..87e8af5776a389166278e24c6087a49f03682dbb 100644 (file)
@@ -2731,6 +2731,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
        }
 
        view_type = vmw_view_cmd_to_type(header->id);
+       if (view_type == vmw_view_max)
+               return -EINVAL;
        cmd = container_of(header, typeof(*cmd), header);
        ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
                                user_surface_converter,
index 0545740b3724f1d6c28b9e1dd169351fdd723949..641294aef1652e1d8599016090aacd4bb33364b0 100644 (file)
@@ -697,7 +697,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
        vps->pinned = 0;
 
        /* Mapping is managed by prepare_fb/cleanup_fb */
-       memset(&vps->guest_map, 0, sizeof(vps->guest_map));
        memset(&vps->host_map, 0, sizeof(vps->host_map));
        vps->cpp = 0;
 
@@ -760,11 +759,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
 
 
        /* Should have been freed by cleanup_fb */
-       if (vps->guest_map.virtual) {
-               DRM_ERROR("Guest mapping not freed\n");
-               ttm_bo_kunmap(&vps->guest_map);
-       }
-
        if (vps->host_map.virtual) {
                DRM_ERROR("Host mapping not freed\n");
                ttm_bo_kunmap(&vps->host_map);
index ff9c8389ff21c3b2923c8387455976d5425e33f3..cd9da2dd79af1a062d4aaa3d6b6ded468bd4207e 100644 (file)
@@ -175,7 +175,7 @@ struct vmw_plane_state {
        int pinned;
 
        /* For CPU Blit */
-       struct ttm_bo_kmap_obj host_map, guest_map;
+       struct ttm_bo_kmap_obj host_map;
        unsigned int cpp;
 };
 
index 90b5437fd787e0de7d360b5b697fb33d4a8551c3..b68d74888ab1100be82f8a2a9fdc3234a4e04293 100644 (file)
@@ -114,7 +114,7 @@ struct vmw_screen_target_display_unit {
        bool defined;
 
        /* For CPU Blit */
-       struct ttm_bo_kmap_obj host_map, guest_map;
+       struct ttm_bo_kmap_obj host_map;
        unsigned int cpp;
 };
 
@@ -695,7 +695,8 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
        s32 src_pitch, dst_pitch;
        u8 *src, *dst;
        bool not_used;
-
+       struct ttm_bo_kmap_obj guest_map;
+       int ret;
 
        if (!dirty->num_hits)
                return;
@@ -706,6 +707,13 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
        if (width == 0 || height == 0)
                return;
 
+       ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
+                         &guest_map);
+       if (ret) {
+               DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
+                         ret);
+               goto out_cleanup;
+       }
 
        /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
        src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
@@ -713,7 +721,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
        src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
 
        dst_pitch = ddirty->pitch;
-       dst = ttm_kmap_obj_virtual(&stdu->guest_map, &not_used);
+       dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
        dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
 
 
@@ -772,6 +780,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
                vmw_fifo_commit(dev_priv, sizeof(*cmd));
        }
 
+       ttm_bo_kunmap(&guest_map);
 out_cleanup:
        ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
        ddirty->right = ddirty->bottom = S32_MIN;
@@ -1109,9 +1118,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
 {
        struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
 
-       if (vps->guest_map.virtual)
-               ttm_bo_kunmap(&vps->guest_map);
-
        if (vps->host_map.virtual)
                ttm_bo_kunmap(&vps->host_map);
 
@@ -1277,33 +1283,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
         */
        if (vps->content_fb_type == SEPARATE_DMA &&
            !(dev_priv->capabilities & SVGA_CAP_3D)) {
-
-               struct vmw_framebuffer_dmabuf *new_vfbd;
-
-               new_vfbd = vmw_framebuffer_to_vfbd(new_fb);
-
-               ret = ttm_bo_reserve(&new_vfbd->buffer->base, false, false,
-                                    NULL);
-               if (ret)
-                       goto out_srf_unpin;
-
-               ret = ttm_bo_kmap(&new_vfbd->buffer->base, 0,
-                                 new_vfbd->buffer->base.num_pages,
-                                 &vps->guest_map);
-
-               ttm_bo_unreserve(&new_vfbd->buffer->base);
-
-               if (ret) {
-                       DRM_ERROR("Failed to map content buffer to CPU\n");
-                       goto out_srf_unpin;
-               }
-
                ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
                                  vps->surf->res.backup->base.num_pages,
                                  &vps->host_map);
                if (ret) {
                        DRM_ERROR("Failed to map display buffer to CPU\n");
-                       ttm_bo_kunmap(&vps->guest_map);
                        goto out_srf_unpin;
                }
 
@@ -1350,7 +1334,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
        stdu->display_srf = vps->surf;
        stdu->content_fb_type = vps->content_fb_type;
        stdu->cpp = vps->cpp;
-       memcpy(&stdu->guest_map, &vps->guest_map, sizeof(vps->guest_map));
        memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
 
        if (!stdu->defined)
index 7750a9c38b066777982c8309d8436b5780235d36..1df7da47f431743306fd9e2283cbba3090aac9c4 100644 (file)
@@ -763,11 +763,11 @@ static int complete_subctxt(struct hfi1_filedata *fd)
        }
 
        if (ret) {
-               hfi1_rcd_put(fd->uctxt);
-               fd->uctxt = NULL;
                spin_lock_irqsave(&fd->dd->uctxt_lock, flags);
                __clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
                spin_unlock_irqrestore(&fd->dd->uctxt_lock, flags);
+               hfi1_rcd_put(fd->uctxt);
+               fd->uctxt = NULL;
        }
 
        return ret;
index 31ad28853efa9a851580728691cf2f2bbf4e1d75..cffe5966aef97a1c8b4be352b9aa1b103b5426ed 100644 (file)
@@ -4362,12 +4362,11 @@ static void to_rdma_ah_attr(struct mlx5_ib_dev *ibdev,
 
        memset(ah_attr, 0, sizeof(*ah_attr));
 
-       ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port);
-       rdma_ah_set_port_num(ah_attr, path->port);
-       if (rdma_ah_get_port_num(ah_attr) == 0 ||
-           rdma_ah_get_port_num(ah_attr) > MLX5_CAP_GEN(dev, num_ports))
+       if (!path->port || path->port > MLX5_CAP_GEN(dev, num_ports))
                return;
 
+       ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, path->port);
+
        rdma_ah_set_port_num(ah_attr, path->port);
        rdma_ah_set_sl(ah_attr, path->dci_cfi_prio_sl & 0xf);
 
index 720dfb3a1ac271bd16950563006664e83cab6be8..1b02283ce20eff38897f2dcf729924ff6998ff52 100644 (file)
@@ -741,6 +741,7 @@ isert_connect_error(struct rdma_cm_id *cma_id)
 {
        struct isert_conn *isert_conn = cma_id->qp->qp_context;
 
+       ib_drain_qp(isert_conn->qp);
        list_del_init(&isert_conn->node);
        isert_conn->cm_id = NULL;
        isert_put_conn(isert_conn);
index fcf7235d5742ae443bdc629d293983db5b60312d..157e1d9e7725a050f64c32ebed135b46ae0ee58d 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
@@ -667,3 +668,5 @@ int renesas_sdhi_remove(struct platform_device *pdev)
        return 0;
 }
 EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
+
+MODULE_LICENSE("GPL v2");
index f7f157a62a4a7dfa7155b5fe9933ead9f870ad1b..555c7f133eb8a5246ff3534d1899927463e99dc2 100644 (file)
@@ -1424,7 +1424,9 @@ static const struct file_operations s3cmci_fops_state = {
 struct s3cmci_reg {
        unsigned short  addr;
        unsigned char   *name;
-} debug_regs[] = {
+};
+
+static const struct s3cmci_reg debug_regs[] = {
        DBG_REG(CON),
        DBG_REG(PRE),
        DBG_REG(CMDARG),
@@ -1446,7 +1448,7 @@ struct s3cmci_reg {
 static int s3cmci_regs_show(struct seq_file *seq, void *v)
 {
        struct s3cmci_host *host = seq->private;
-       struct s3cmci_reg *rptr = debug_regs;
+       const struct s3cmci_reg *rptr = debug_regs;
 
        for (; rptr->name; rptr++)
                seq_printf(seq, "SDI%s\t=0x%08x\n", rptr->name,
index 2260063b0ea83be7f24e5d780af8bd808c23b427..6e5cf9d9cd9927f4264e3a01394e2b84d80d7d3b 100644 (file)
@@ -413,6 +413,7 @@ static int of_dev_node_match(struct device *dev, const void *data)
        return dev->of_node == data;
 }
 
+/* Note this function returns a reference to the mux_chip dev. */
 static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
 {
        struct device *dev;
@@ -466,6 +467,7 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
            (!args.args_count && (mux_chip->controllers > 1))) {
                dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n",
                        np, args.np);
+               put_device(&mux_chip->dev);
                return ERR_PTR(-EINVAL);
        }
 
@@ -476,10 +478,10 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
        if (controller >= mux_chip->controllers) {
                dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n",
                        np, controller, args.np);
+               put_device(&mux_chip->dev);
                return ERR_PTR(-EINVAL);
        }
 
-       get_device(&mux_chip->dev);
        return &mux_chip->mux[controller];
 }
 EXPORT_SYMBOL_GPL(mux_control_get);
index 7ccdc3e30c98c25d238d0a124ffb854675b1732c..53d6bb045e9e91193452320b9e6cc5226849c1da 100644 (file)
@@ -184,7 +184,7 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
        void *cmd_head = pcan_usb_fd_cmd_buffer(dev);
        int err = 0;
        u8 *packet_ptr;
-       int i, n = 1, packet_len;
+       int packet_len;
        ptrdiff_t cmd_len;
 
        /* usb device unregistered? */
@@ -201,17 +201,13 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
        }
 
        packet_ptr = cmd_head;
+       packet_len = cmd_len;
 
        /* firmware is not able to re-assemble 512 bytes buffer in full-speed */
-       if ((dev->udev->speed != USB_SPEED_HIGH) &&
-           (cmd_len > PCAN_UFD_LOSPD_PKT_SIZE)) {
-               packet_len = PCAN_UFD_LOSPD_PKT_SIZE;
-               n += cmd_len / packet_len;
-       } else {
-               packet_len = cmd_len;
-       }
+       if (unlikely(dev->udev->speed != USB_SPEED_HIGH))
+               packet_len = min(packet_len, PCAN_UFD_LOSPD_PKT_SIZE);
 
-       for (i = 0; i < n; i++) {
+       do {
                err = usb_bulk_msg(dev->udev,
                                   usb_sndbulkpipe(dev->udev,
                                                   PCAN_USBPRO_EP_CMDOUT),
@@ -224,7 +220,12 @@ static int pcan_usb_fd_send_cmd(struct peak_usb_device *dev, void *cmd_tail)
                }
 
                packet_ptr += packet_len;
-       }
+               cmd_len -= packet_len;
+
+               if (cmd_len < PCAN_UFD_LOSPD_PKT_SIZE)
+                       packet_len = cmd_len;
+
+       } while (packet_len > 0);
 
        return err;
 }
index 410a0a95130b4ed1b292dcc628ae820c164146da..b3e7fafee3dfb97375b9dc460a57fa10ed789280 100644 (file)
@@ -1913,3 +1913,7 @@ static struct platform_driver cs89x0_driver = {
 module_platform_driver_probe(cs89x0_driver, cs89x0_platform_probe);
 
 #endif /* CONFIG_CS89x0_PLATFORM */
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Crystal Semiconductor (Now Cirrus Logic) CS89[02]0 network driver");
+MODULE_AUTHOR("Russell Nelson <nelson@crynwr.com>");
index 7892f2f0c6b56473ed86b39d6525535f61f7b741..2c2976a2dda6b9f1055af98c19cbc9d17bf8279b 100644 (file)
@@ -613,9 +613,11 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
        return NETDEV_TX_OK;
 }
 
-static void fs_timeout(struct net_device *dev)
+static void fs_timeout_work(struct work_struct *work)
 {
-       struct fs_enet_private *fep = netdev_priv(dev);
+       struct fs_enet_private *fep = container_of(work, struct fs_enet_private,
+                                                  timeout_work);
+       struct net_device *dev = fep->ndev;
        unsigned long flags;
        int wake = 0;
 
@@ -627,7 +629,6 @@ static void fs_timeout(struct net_device *dev)
                phy_stop(dev->phydev);
                (*fep->ops->stop)(dev);
                (*fep->ops->restart)(dev);
-               phy_start(dev->phydev);
        }
 
        phy_start(dev->phydev);
@@ -639,6 +640,13 @@ static void fs_timeout(struct net_device *dev)
                netif_wake_queue(dev);
 }
 
+static void fs_timeout(struct net_device *dev)
+{
+       struct fs_enet_private *fep = netdev_priv(dev);
+
+       schedule_work(&fep->timeout_work);
+}
+
 /*-----------------------------------------------------------------------------
  *  generic link-change handler - should be sufficient for most cases
  *-----------------------------------------------------------------------------*/
@@ -759,6 +767,7 @@ static int fs_enet_close(struct net_device *dev)
        netif_stop_queue(dev);
        netif_carrier_off(dev);
        napi_disable(&fep->napi);
+       cancel_work_sync(&fep->timeout_work);
        phy_stop(dev->phydev);
 
        spin_lock_irqsave(&fep->lock, flags);
@@ -1019,6 +1028,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
 
        ndev->netdev_ops = &fs_enet_netdev_ops;
        ndev->watchdog_timeo = 2 * HZ;
+       INIT_WORK(&fep->timeout_work, fs_timeout_work);
        netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight);
 
        ndev->ethtool_ops = &fs_ethtool_ops;
index 92e06b37a199bb9aa7bdb8a23c298296aa5bd217..195fae6aec4ae71bbdcd77114c1e86148c43c061 100644 (file)
@@ -125,6 +125,7 @@ struct fs_enet_private {
        spinlock_t lock;        /* during all ops except TX pckt processing */
        spinlock_t tx_lock;     /* during fs_start_xmit and fs_tx         */
        struct fs_platform_info *fpi;
+       struct work_struct timeout_work;
        const struct fs_ops *ops;
        int rx_ring, tx_ring;
        dma_addr_t ring_mem_addr;
index 1dc4aef37d3a4faf3592ab0dede92b802a831824..4b3df17c7a457da545cf8b0e1d5c13f70e41a855 100644 (file)
@@ -756,6 +756,12 @@ static int ibmvnic_login(struct net_device *netdev)
                }
        } while (adapter->renegotiate);
 
+       /* handle pending MAC address changes after successful login */
+       if (adapter->mac_change_pending) {
+               __ibmvnic_set_mac(netdev, &adapter->desired.mac);
+               adapter->mac_change_pending = false;
+       }
+
        return 0;
 }
 
@@ -993,11 +999,6 @@ static int ibmvnic_open(struct net_device *netdev)
 
        mutex_lock(&adapter->reset_lock);
 
-       if (adapter->mac_change_pending) {
-               __ibmvnic_set_mac(netdev, &adapter->desired.mac);
-               adapter->mac_change_pending = false;
-       }
-
        if (adapter->state != VNIC_CLOSED) {
                rc = ibmvnic_login(netdev);
                if (rc) {
@@ -1527,7 +1528,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
        struct sockaddr *addr = p;
 
-       if (adapter->state != VNIC_OPEN) {
+       if (adapter->state == VNIC_PROBED) {
                memcpy(&adapter->desired.mac, addr, sizeof(struct sockaddr));
                adapter->mac_change_pending = true;
                return 0;
index 2801ecd09eab098e66ef28aa89fcd5c5f7e7430d..6c02b2d6ba06625e8f11833c1f3a23f07895766a 100644 (file)
@@ -333,7 +333,7 @@ nfp_net_get_link_ksettings(struct net_device *netdev,
            ls >= ARRAY_SIZE(ls_to_ethtool))
                return 0;
 
-       cmd->base.speed = ls_to_ethtool[sts];
+       cmd->base.speed = ls_to_ethtool[ls];
        cmd->base.duplex = DUPLEX_FULL;
 
        return 0;
index be48d9abd0010ed88cb6c6161cbf4e7b81429710..3588081b2e274f2308eb795ca06632b9a5a442e8 100644 (file)
@@ -776,6 +776,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
        int rc = 0;
        struct qed_spq *p_spq = p_hwfn ? p_hwfn->p_spq : NULL;
        bool b_ret_ent = true;
+       bool eblock;
 
        if (!p_hwfn)
                return -EINVAL;
@@ -794,6 +795,11 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
        if (rc)
                goto spq_post_fail;
 
+       /* Check if entry is in block mode before qed_spq_add_entry,
+        * which might kfree p_ent.
+        */
+       eblock = (p_ent->comp_mode == QED_SPQ_MODE_EBLOCK);
+
        /* Add the request to the pending queue */
        rc = qed_spq_add_entry(p_hwfn, p_ent, p_ent->priority);
        if (rc)
@@ -811,7 +817,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 
        spin_unlock_bh(&p_spq->lock);
 
-       if (p_ent->comp_mode == QED_SPQ_MODE_EBLOCK) {
+       if (eblock) {
                /* For entries in QED BLOCK mode, the completion code cannot
                 * perform the necessary cleanup - if it did, we couldn't
                 * access p_ent here to see whether it's successful or not.
index b9e2846589f8678e72683c230d601d7f517de5da..53924a4fc31c75382e0a2d910b09d389a066ec2a 100644 (file)
@@ -2089,8 +2089,8 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf)
                add_reg(CSMR);
        if (cd->select_mii)
                add_reg(RMII_MII);
-       add_reg(ARSTR);
        if (cd->tsu) {
+               add_tsu_reg(ARSTR);
                add_tsu_reg(TSU_CTRST);
                add_tsu_reg(TSU_FWEN0);
                add_tsu_reg(TSU_FWEN1);
index ed58c746e4af194a9edc8edb3d5c15bd3e2e0acd..f5a7eb22d0f50e38a39c94f1dbb01cd17dff73c7 100644 (file)
@@ -715,7 +715,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
                /* warning!!!! We are retrieving the virtual ptr in the sw_data
                 * field as a 32bit value. Will not work on 64bit machines
                 */
-               page = (struct page *)GET_SW_DATA0(desc);
+               page = (struct page *)GET_SW_DATA0(ndesc);
 
                if (likely(dma_buff && buf_len && page)) {
                        dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
index d8e5747ff4e32e9bf0c221e88345fd27c60b0e5f..264d4af0bf69278f5ee50e804c9ccf7225fb5782 100644 (file)
@@ -1006,17 +1006,18 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set)
        if (!ifname_is_set)
                snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index);
 
+       mutex_unlock(&pn->all_ppp_mutex);
+
        ret = register_netdevice(ppp->dev);
        if (ret < 0)
                goto err_unit;
 
        atomic_inc(&ppp_unit_count);
 
-       mutex_unlock(&pn->all_ppp_mutex);
-
        return 0;
 
 err_unit:
+       mutex_lock(&pn->all_ppp_mutex);
        unit_put(&pn->units_idr, ppp->file.index);
 err:
        mutex_unlock(&pn->all_ppp_mutex);
index 4f4a842a1c9cb8ac3397b329854a0fc7bd2f6aa3..a8ec589d1359a86614e9ed829b91fd145494cd00 100644 (file)
@@ -611,6 +611,14 @@ static void tun_queue_purge(struct tun_file *tfile)
        skb_queue_purge(&tfile->sk.sk_error_queue);
 }
 
+static void tun_cleanup_tx_array(struct tun_file *tfile)
+{
+       if (tfile->tx_array.ring.queue) {
+               skb_array_cleanup(&tfile->tx_array);
+               memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
+       }
+}
+
 static void __tun_detach(struct tun_file *tfile, bool clean)
 {
        struct tun_file *ntfile;
@@ -657,8 +665,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
                            tun->dev->reg_state == NETREG_REGISTERED)
                                unregister_netdevice(tun->dev);
                }
-               if (tun)
-                       skb_array_cleanup(&tfile->tx_array);
+               tun_cleanup_tx_array(tfile);
                sock_put(&tfile->sk);
        }
 }
@@ -700,11 +707,13 @@ static void tun_detach_all(struct net_device *dev)
                /* Drop read queue */
                tun_queue_purge(tfile);
                sock_put(&tfile->sk);
+               tun_cleanup_tx_array(tfile);
        }
        list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
                tun_enable_queue(tfile);
                tun_queue_purge(tfile);
                sock_put(&tfile->sk);
+               tun_cleanup_tx_array(tfile);
        }
        BUG_ON(tun->numdisabled != 0);
 
@@ -2851,6 +2860,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 
        sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
 
+       memset(&tfile->tx_array, 0, sizeof(tfile->tx_array));
+
        return 0;
 }
 
index 94c7804903c48eba2ed0496d7d4461e9c38b8c3c..ec56ff29aac4ed9f28e3f984093968da53147ac5 100644 (file)
@@ -2396,6 +2396,7 @@ static int lan78xx_reset(struct lan78xx_net *dev)
                buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE;
                dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE;
                dev->rx_qlen = 4;
+               dev->tx_qlen = 4;
        }
 
        ret = lan78xx_write_reg(dev, BURST_CAP, buf);
index d51d9abf7986b203350167d6a2fcdcfcddf8b972..0657203ffb9174bac37085613100726f30928a1a 100644 (file)
@@ -606,6 +606,7 @@ enum rtl8152_flags {
        PHY_RESET,
        SCHEDULE_NAPI,
        GREEN_ETHERNET,
+       DELL_TB_RX_AGG_BUG,
 };
 
 /* Define these values to match your device */
@@ -1798,6 +1799,9 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
                dev_kfree_skb_any(skb);
 
                remain = agg_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
+
+               if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+                       break;
        }
 
        if (!skb_queue_empty(&skb_head)) {
@@ -4133,6 +4137,9 @@ static void r8153_init(struct r8152 *tp)
        /* rx aggregation */
        ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_USB_CTRL);
        ocp_data &= ~(RX_AGG_DISABLE | RX_ZERO_EN);
+       if (test_bit(DELL_TB_RX_AGG_BUG, &tp->flags))
+               ocp_data |= RX_AGG_DISABLE;
+
        ocp_write_word(tp, MCU_TYPE_USB, USB_USB_CTRL, ocp_data);
 
        rtl_tally_reset(tp);
@@ -5207,6 +5214,12 @@ static int rtl8152_probe(struct usb_interface *intf,
                netdev->hw_features &= ~NETIF_F_RXCSUM;
        }
 
+       if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 &&
+           udev->serial && !strcmp(udev->serial, "000001000000")) {
+               dev_info(&udev->dev, "Dell TB16 Dock, disable RX aggregation");
+               set_bit(DELL_TB_RX_AGG_BUG, &tp->flags);
+       }
+
        netdev->ethtool_ops = &ops;
        netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
 
index e8189c07b41f6b450f135ef703ec4e01568e311d..f6d4a50f1bdb8a441a6d20a2d1fe7d4f25ce6636 100644 (file)
@@ -489,6 +489,7 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = {
 
 static spinlock_t hwsim_radio_lock;
 static LIST_HEAD(hwsim_radios);
+static struct workqueue_struct *hwsim_wq;
 static int hwsim_radio_idx;
 
 static struct platform_driver mac80211_hwsim_driver = {
@@ -3120,6 +3121,11 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
        if (info->attrs[HWSIM_ATTR_CHANNELS])
                param.channels = nla_get_u32(info->attrs[HWSIM_ATTR_CHANNELS]);
 
+       if (param.channels > CFG80211_MAX_NUM_DIFFERENT_CHANNELS) {
+               GENL_SET_ERR_MSG(info, "too many channels specified");
+               return -EINVAL;
+       }
+
        if (info->attrs[HWSIM_ATTR_NO_VIF])
                param.no_vif = true;
 
@@ -3342,7 +3348,7 @@ static void remove_user_radios(u32 portid)
                if (entry->destroy_on_close && entry->portid == portid) {
                        list_del(&entry->list);
                        INIT_WORK(&entry->destroy_work, destroy_radio);
-                       schedule_work(&entry->destroy_work);
+                       queue_work(hwsim_wq, &entry->destroy_work);
                }
        }
        spin_unlock_bh(&hwsim_radio_lock);
@@ -3417,7 +3423,7 @@ static void __net_exit hwsim_exit_net(struct net *net)
 
                list_del(&data->list);
                INIT_WORK(&data->destroy_work, destroy_radio);
-               schedule_work(&data->destroy_work);
+               queue_work(hwsim_wq, &data->destroy_work);
        }
        spin_unlock_bh(&hwsim_radio_lock);
 }
@@ -3449,6 +3455,10 @@ static int __init init_mac80211_hwsim(void)
 
        spin_lock_init(&hwsim_radio_lock);
 
+       hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0);
+       if (!hwsim_wq)
+               return -ENOMEM;
+
        err = register_pernet_device(&hwsim_net_ops);
        if (err)
                return err;
@@ -3587,8 +3597,11 @@ static void __exit exit_mac80211_hwsim(void)
        hwsim_exit_netlink();
 
        mac80211_hwsim_free();
+       flush_workqueue(hwsim_wq);
+
        unregister_netdev(hwsim_mon);
        platform_driver_unregister(&mac80211_hwsim_driver);
        unregister_pernet_device(&hwsim_net_ops);
+       destroy_workqueue(hwsim_wq);
 }
 module_exit(exit_mac80211_hwsim);
index 76b4fe6816a03533dbb6bbfbf30016110a26c648..894c2ccb3891e0b83e1c839f0b08f4cba5d179ae 100644 (file)
@@ -74,6 +74,7 @@ static struct nvmf_host *nvmf_host_default(void)
                return NULL;
 
        kref_init(&host->ref);
+       uuid_gen(&host->id);
        snprintf(host->nqn, NVMF_NQN_SIZE,
                "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
 
index 0f695df14c9d8f0a5c0a95ea8c28dc075c2aa36c..372ce9913e6dea373d4cdd06ca51f2419c7a7373 100644 (file)
@@ -765,10 +765,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                break;
        case ASHMEM_SET_SIZE:
                ret = -EINVAL;
+               mutex_lock(&ashmem_mutex);
                if (!asma->file) {
                        ret = 0;
                        asma->size = (size_t)arg;
                }
+               mutex_unlock(&ashmem_mutex);
                break;
        case ASHMEM_GET_SIZE:
                ret = asma->size;
index 93eff7dec2f5e9d7d9449b1189f8e5d2c91728e6..1b3efb14aec7878e616a3a1a7588046e2641aec7 100644 (file)
@@ -1147,11 +1147,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 
        udc = kzalloc(sizeof(*udc), GFP_KERNEL);
        if (!udc)
-               goto err1;
-
-       ret = device_add(&gadget->dev);
-       if (ret)
-               goto err2;
+               goto err_put_gadget;
 
        device_initialize(&udc->dev);
        udc->dev.release = usb_udc_release;
@@ -1160,7 +1156,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
        udc->dev.parent = parent;
        ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj));
        if (ret)
-               goto err3;
+               goto err_put_udc;
+
+       ret = device_add(&gadget->dev);
+       if (ret)
+               goto err_put_udc;
 
        udc->gadget = gadget;
        gadget->udc = udc;
@@ -1170,7 +1170,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 
        ret = device_add(&udc->dev);
        if (ret)
-               goto err4;
+               goto err_unlist_udc;
 
        usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
        udc->vbus = true;
@@ -1178,27 +1178,25 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
        /* pick up one of pending gadget drivers */
        ret = check_pending_gadget_drivers(udc);
        if (ret)
-               goto err5;
+               goto err_del_udc;
 
        mutex_unlock(&udc_lock);
 
        return 0;
 
-err5:
+ err_del_udc:
        device_del(&udc->dev);
 
-err4:
+ err_unlist_udc:
        list_del(&udc->list);
        mutex_unlock(&udc_lock);
 
-err3:
-       put_device(&udc->dev);
        device_del(&gadget->dev);
 
-err2:
-       kfree(udc);
+ err_put_udc:
+       put_device(&udc->dev);
 
-err1:
+ err_put_gadget:
        put_device(&gadget->dev);
        return ret;
 }
index 465dbf68b4633d438e858260d5db12d6bc0ce44e..f723f7b8c9ac4cdb65e5beb9a401ac3ce64f99e2 100644 (file)
@@ -279,6 +279,8 @@ static int usb3503_probe(struct usb3503 *hub)
        if (gpio_is_valid(hub->gpio_reset)) {
                err = devm_gpio_request_one(dev, hub->gpio_reset,
                                GPIOF_OUT_INIT_LOW, "usb3503 reset");
+               /* Datasheet defines a hardware reset to be at least 100us */
+               usleep_range(100, 10000);
                if (err) {
                        dev_err(dev,
                                "unable to request GPIO %d as reset pin (%d)\n",
index f6ae753ab99b0998fcc99463fd50971f658dfaf0..f932f40302df942b744353f0db738edba88c553f 100644 (file)
@@ -1004,7 +1004,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
                break;
 
        case MON_IOCQ_RING_SIZE:
+               mutex_lock(&rp->fetch_lock);
                ret = rp->b_size;
+               mutex_unlock(&rp->fetch_lock);
                break;
 
        case MON_IOCT_RING_SIZE:
@@ -1231,12 +1233,16 @@ static int mon_bin_vma_fault(struct vm_fault *vmf)
        unsigned long offset, chunk_idx;
        struct page *pageptr;
 
+       mutex_lock(&rp->fetch_lock);
        offset = vmf->pgoff << PAGE_SHIFT;
-       if (offset >= rp->b_size)
+       if (offset >= rp->b_size) {
+               mutex_unlock(&rp->fetch_lock);
                return VM_FAULT_SIGBUS;
+       }
        chunk_idx = offset / CHUNK_SIZE;
        pageptr = rp->b_vec[chunk_idx].pg;
        get_page(pageptr);
+       mutex_unlock(&rp->fetch_lock);
        vmf->page = pageptr;
        return 0;
 }
index 7c6273bf5bebcfff7b573c23659ef2da219bd29d..06d502b3e91344c809523f67cf75cd364dc2cbed 100644 (file)
@@ -124,6 +124,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
        { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
        { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+       { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
        { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
        { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
        { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
@@ -174,6 +175,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
        { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
        { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+       { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
        { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
        { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
        { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
index e6127fb21c123f397099dcae0ba5947ca7772061..a7d08ae0adaddc257c6399f0f8c5897fc16e10ff 100644 (file)
@@ -143,6 +143,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
                USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                US_FL_NO_ATA_1X),
 
+/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
+UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
+               "Norelsys",
+               "NS1068X",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_IGNORE_UAS),
+
 /* Reported-by: Takeo Nakayama <javhera@gmx.com> */
 UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
                "JMicron",
index 7b219d9109b41282537363caa976e53c9b675332..ee2bbce24584c1b28e9ea81fff476328635d7707 100644 (file)
@@ -91,7 +91,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
        dev_dbg(dev, "       devnum(%d) devpath(%s) usb speed(%s)",
                udev->devnum, udev->devpath, usb_speed_string(udev->speed));
 
-       pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
+       pr_debug("tt hub ttport %d\n", udev->ttport);
 
        dev_dbg(dev, "                    ");
        for (i = 0; i < 16; i++)
@@ -124,12 +124,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
        }
        pr_debug("\n");
 
-       dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
-
-       dev_dbg(dev,
-               "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
-               &udev->descriptor, udev->config,
-               udev->actconfig, udev->rawdescriptors);
+       dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
+               udev->bus->bus_name);
 
        dev_dbg(dev, "have_langid %d, string_langid %d\n",
                udev->have_langid, udev->string_langid);
@@ -237,9 +233,6 @@ void usbip_dump_urb(struct urb *urb)
 
        dev = &urb->dev->dev;
 
-       dev_dbg(dev, "   urb                   :%p\n", urb);
-       dev_dbg(dev, "   dev                   :%p\n", urb->dev);
-
        usbip_dump_usb_device(urb->dev);
 
        dev_dbg(dev, "   pipe                  :%08x ", urb->pipe);
@@ -248,11 +241,9 @@ void usbip_dump_urb(struct urb *urb)
 
        dev_dbg(dev, "   status                :%d\n", urb->status);
        dev_dbg(dev, "   transfer_flags        :%08X\n", urb->transfer_flags);
-       dev_dbg(dev, "   transfer_buffer       :%p\n", urb->transfer_buffer);
        dev_dbg(dev, "   transfer_buffer_length:%d\n",
                                                urb->transfer_buffer_length);
        dev_dbg(dev, "   actual_length         :%d\n", urb->actual_length);
-       dev_dbg(dev, "   setup_packet          :%p\n", urb->setup_packet);
 
        if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
                usbip_dump_usb_ctrlrequest(
@@ -262,8 +253,6 @@ void usbip_dump_urb(struct urb *urb)
        dev_dbg(dev, "   number_of_packets     :%d\n", urb->number_of_packets);
        dev_dbg(dev, "   interval              :%d\n", urb->interval);
        dev_dbg(dev, "   error_count           :%d\n", urb->error_count);
-       dev_dbg(dev, "   context               :%p\n", urb->context);
-       dev_dbg(dev, "   complete              :%p\n", urb->complete);
 }
 EXPORT_SYMBOL_GPL(usbip_dump_urb);
 
index df1e309891488eebd0e5439738140c22654cc711..1e8a23d92cb4b86bd5ae7173b6b35ac8e37392f9 100644 (file)
@@ -120,6 +120,25 @@ static int v_recv_cmd_submit(struct vudc *udc,
        urb_p->new = 1;
        urb_p->seqnum = pdu->base.seqnum;
 
+       if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
+               /* validate packet size and number of packets */
+               unsigned int maxp, packets, bytes;
+
+               maxp = usb_endpoint_maxp(urb_p->ep->desc);
+               maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
+               bytes = pdu->u.cmd_submit.transfer_buffer_length;
+               packets = DIV_ROUND_UP(bytes, maxp);
+
+               if (pdu->u.cmd_submit.number_of_packets < 0 ||
+                   pdu->u.cmd_submit.number_of_packets > packets) {
+                       dev_err(&udc->gadget.dev,
+                               "CMD_SUBMIT: isoc invalid num packets %d\n",
+                               pdu->u.cmd_submit.number_of_packets);
+                       ret = -EMSGSIZE;
+                       goto free_urbp;
+               }
+       }
+
        ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
        if (ret) {
                usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
index 1440ae0919ec7c5a5f2fc62903faa43c2aeee5ed..3ccb17c3e84060a907afb8c2914e3f69039f753d 100644 (file)
@@ -85,6 +85,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
        memset(&pdu_header, 0, sizeof(pdu_header));
        memset(&msg, 0, sizeof(msg));
 
+       if (urb->actual_length > 0 && !urb->transfer_buffer) {
+               dev_err(&udc->gadget.dev,
+                       "urb: actual_length %d transfer_buffer null\n",
+                       urb->actual_length);
+               return -1;
+       }
+
        if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
                iovnum = 2 + urb->number_of_packets;
        else
@@ -100,8 +107,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
 
        /* 1. setup usbip_header */
        setup_ret_submit_pdu(&pdu_header, urb_p);
-       usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
-                         pdu_header.base.seqnum, urb);
+       usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+                         pdu_header.base.seqnum);
        usbip_header_correct_endian(&pdu_header, 1);
 
        iov[iovnum].iov_base = &pdu_header;
index 57efbd3b053b37ca43816483dd3364c3c48a761e..bd56653b9bbc2bed8e27c4909b2e8c025c4f1627 100644 (file)
@@ -380,10 +380,8 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
                }
                range = 0;
                while (range < pages) {
-                       if (map->unmap_ops[offset+range].handle == -1) {
-                               range--;
+                       if (map->unmap_ops[offset+range].handle == -1)
                                break;
-                       }
                        range++;
                }
                err = __unmap_grant_pages(map, offset, range);
@@ -1073,8 +1071,10 @@ unlock_out:
 out_unlock_put:
        mutex_unlock(&priv->lock);
 out_put_map:
-       if (use_ptemod)
+       if (use_ptemod) {
                map->vma = NULL;
+               unmap_grant_pages(map, 0, map->count);
+       }
        gntdev_put_map(priv, map);
        return err;
 }
index 94a59ba7d422f4d3b4a53314d99fa4a1367fbcc4..519e94915d1850628ba3a79f7341c29ce3a68f40 100644 (file)
@@ -32,7 +32,6 @@ struct completion {
 #define init_completion(x) __init_completion(x)
 static inline void complete_acquire(struct completion *x) {}
 static inline void complete_release(struct completion *x) {}
-static inline void complete_release_commit(struct completion *x) {}
 
 #define COMPLETION_INITIALIZER(work) \
        { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
index a04ef7c15c6a9dd504a288e23d7da3d6322459e2..7b01bc11c6929b7aaf4906a1587ee1b4e19b2e70 100644 (file)
@@ -47,6 +47,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
 extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
 extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
 
+extern ssize_t cpu_show_meltdown(struct device *dev,
+                                struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v1(struct device *dev,
+                                  struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v2(struct device *dev,
+                                  struct device_attribute *attr, char *buf);
+
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
                                 const struct attribute_group **groups,
index 06097ef304491dc8f9743154c10f92d20165cb47..b511f6d24b42b0c4a2796bc33f494d86b7c7854b 100644 (file)
@@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
        vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
 #define VMCOREINFO_SYMBOL(name) \
        vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ARRAY(name) \
+       vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
 #define VMCOREINFO_SIZE(name) \
        vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
                              (unsigned long)sizeof(name))
index 46cb57d5eb1361e521f36208f0fef6b31fd071ea..1b3996ff3f16d10158056e45425b81b5e7c5cbd8 100644 (file)
 # define trace_hardirq_enter()                 \
 do {                                           \
        current->hardirq_context++;             \
-       crossrelease_hist_start(XHLOCK_HARD);   \
 } while (0)
 # define trace_hardirq_exit()                  \
 do {                                           \
        current->hardirq_context--;             \
-       crossrelease_hist_end(XHLOCK_HARD);     \
 } while (0)
 # define lockdep_softirq_enter()               \
 do {                                           \
        current->softirq_context++;             \
-       crossrelease_hist_start(XHLOCK_SOFT);   \
 } while (0)
 # define lockdep_softirq_exit()                        \
 do {                                           \
        current->softirq_context--;             \
-       crossrelease_hist_end(XHLOCK_SOFT);     \
 } while (0)
 # define INIT_TRACE_IRQFLAGS   .softirqs_enabled = 1,
 #else
index 2e75dc34bff5cd3e468571918329793eaf7a0911..3251d9c0d3137ee594a8727f4f817bdb2e2d2350 100644 (file)
@@ -475,8 +475,6 @@ enum xhlock_context_t {
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
        { .name = (_name), .key = (void *)(_key), }
 
-static inline void crossrelease_hist_start(enum xhlock_context_t c) {}
-static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
 static inline void lockdep_invariant_state(bool force) {}
 static inline void lockdep_init_task(struct task_struct *task) {}
 static inline void lockdep_free_task(struct task_struct *task) {}
index 49b4257ce1ea6abc8b188d67e0c48e739859a259..f3075d6c7e8229c999ab650537f1e3b11e1f457b 100644 (file)
@@ -85,7 +85,7 @@ struct netlink_ext_ack {
  * to the lack of an output buffer.)
  */
 #define NL_SET_ERR_MSG(extack, msg) do {               \
-       static const char __msg[] = (msg);              \
+       static const char __msg[] = msg;                \
        struct netlink_ext_ack *__extack = (extack);    \
                                                        \
        if (__extack)                                   \
@@ -101,7 +101,7 @@ struct netlink_ext_ack {
 } while (0)
 
 #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do {    \
-       static const char __msg[] = (msg);              \
+       static const char __msg[] = msg;                \
        struct netlink_ext_ack *__extack = (extack);    \
                                                        \
        if (__extack) {                                 \
index 6866df4f31b59da506d56b3db29501a56d813635..d72b2e7dd500ea5224a3afa330f478b3475e0a4e 100644 (file)
@@ -174,6 +174,15 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
  * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL.
  * If ring is never resized, and if the pointer is merely
  * tested, there's no need to take the lock - see e.g.  __ptr_ring_empty.
+ * However, if called outside the lock, and if some other CPU
+ * consumes ring entries at the same time, the value returned
+ * is not guaranteed to be correct.
+ * In this case - to avoid incorrectly detecting the ring
+ * as empty - the CPU consuming the ring entries is responsible
+ * for either consuming all ring entries until the ring is empty,
+ * or synchronizing with some other CPU and causing it to
+ * execute __ptr_ring_peek and/or consume the ring enteries
+ * after the synchronization point.
  */
 static inline void *__ptr_ring_peek(struct ptr_ring *r)
 {
@@ -182,10 +191,7 @@ static inline void *__ptr_ring_peek(struct ptr_ring *r)
        return NULL;
 }
 
-/* Note: callers invoking this in a loop must use a compiler barrier,
- * for example cpu_relax(). Callers must take consumer_lock
- * if the ring is ever resized - see e.g. ptr_ring_empty.
- */
+/* See __ptr_ring_peek above for locking rules. */
 static inline bool __ptr_ring_empty(struct ptr_ring *r)
 {
        return !__ptr_ring_peek(r);
index dc8cd47f883b8bbb5f1e6875c4d9471920729c36..977aabfcdc03bfe85602fce1becd15f579993f3d 100644 (file)
@@ -20,6 +20,9 @@ static inline u32 arp_hashfn(const void *pkey, const struct net_device *dev, u32
 
 static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
 {
+       if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+               key = INADDR_ANY;
+
        return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev);
 }
 
index cb4d92b79cd932eda4e178861d8345683b329bdb..fb94a8bd8ab54d6ffe722da48436030be637cf70 100644 (file)
@@ -815,6 +815,8 @@ struct cfg80211_csa_settings {
        u8 count;
 };
 
+#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
+
 /**
  * struct iface_combination_params - input parameters for interface combinations
  *
index 83a3e47d5845b99fa61799a15b29e7247d478c72..becf86aa4ac6bc92e8078247a1c912323b63b131 100644 (file)
@@ -179,6 +179,7 @@ struct Qdisc_ops {
        const struct Qdisc_class_ops    *cl_ops;
        char                    id[IFNAMSIZ];
        int                     priv_size;
+       unsigned int            static_flags;
 
        int                     (*enqueue)(struct sk_buff *skb,
                                           struct Qdisc *sch,
@@ -444,6 +445,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
                               unsigned int len);
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                          const struct Qdisc_ops *ops);
+void qdisc_free(struct Qdisc *qdisc);
 struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
                                const struct Qdisc_ops *ops, u32 parentid);
 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
index 936cfc5cab7df843439c493655460640c3d4f07b..9185e53a743cea0a19554c5e62cb1841e2d58e70 100644 (file)
@@ -170,7 +170,7 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 
 static inline void tls_err_abort(struct sock *sk)
 {
-       sk->sk_err = -EBADMSG;
+       sk->sk_err = EBADMSG;
        sk->sk_error_report(sk);
 }
 
index 4265d7f9e1f22da2a3b352cf13d2bf930f530c5d..dcfab5e3b55c1f909bbea35196f92a17665ce16d 100644 (file)
@@ -363,7 +363,6 @@ enum ovs_tunnel_key_attr {
        OVS_TUNNEL_KEY_ATTR_IPV6_SRC,           /* struct in6_addr src IPv6 address. */
        OVS_TUNNEL_KEY_ATTR_IPV6_DST,           /* struct in6_addr dst IPv6 address. */
        OVS_TUNNEL_KEY_ATTR_PAD,
-       OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,        /* be32 ERSPAN index. */
        __OVS_TUNNEL_KEY_ATTR_MAX
 };
 
index 19a6b845d834ba33b4a29be731b79cf63401861f..a9a2e2c86671a18c02c49643151204dd3358157b 100644 (file)
@@ -461,6 +461,7 @@ endmenu # "CPU/Task time and stats accounting"
 
 config CPU_ISOLATION
        bool "CPU isolation"
+       depends on SMP || COMPILE_TEST
        default y
        help
          Make sure that CPUs running critical tasks are not disturbed by
index b3663896278ed71f854963024caa6549b6a57935..4f63597c824dfa81542a38268f9ec339f7fe0c47 100644 (file)
@@ -410,7 +410,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_SYMBOL(contig_page_data);
 #endif
 #ifdef CONFIG_SPARSEMEM
-       VMCOREINFO_SYMBOL(mem_section);
+       VMCOREINFO_SYMBOL_ARRAY(mem_section);
        VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
        VMCOREINFO_STRUCT_SIZE(mem_section);
        VMCOREINFO_OFFSET(mem_section, section_mem_map);
index 2ddaec40956f7cf1356e17404e758b9a11924c8d..0926aef10dadc26d73959b5d7a736e247320a7de 100644 (file)
@@ -34,11 +34,6 @@ void complete(struct completion *x)
 
        spin_lock_irqsave(&x->wait.lock, flags);
 
-       /*
-        * Perform commit of crossrelease here.
-        */
-       complete_release_commit(x);
-
        if (x->done != UINT_MAX)
                x->done++;
        __wake_up_locked(&x->wait, TASK_NORMAL, 1);
index dd7908743dab696facd9dd32f4399ee952228151..9bcbacba82a8115ddceda7fb26097a23c50d44f5 100644 (file)
@@ -89,7 +89,9 @@ static int membarrier_private_expedited(void)
                rcu_read_unlock();
        }
        if (!fallback) {
+               preempt_disable();
                smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+               preempt_enable();
                free_cpumask_var(tmpmask);
        }
        cpus_read_unlock();
index 904c952ac3833bdfd0e017dff437d26f4c545e3d..f54dc62b599ccbdd5d8246c2a370bd3b01b7960a 100644 (file)
@@ -355,7 +355,7 @@ config PROFILE_ANNOTATED_BRANCHES
          on if you need to profile the system's use of these macros.
 
 config PROFILE_ALL_BRANCHES
-       bool "Profile all if conditionals"
+       bool "Profile all if conditionals" if !FORTIFY_SOURCE
        select TRACE_BRANCH_PROFILING
        help
          This tracer profiles all branch conditions. Every if ()
index 9ab18995ff1ebeea0e862f48b43f64924c87e127..0cddf60186da0b72b0db7b57fd115030c0ee6126 100644 (file)
@@ -2534,29 +2534,59 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
  * The lock and unlock are done within a preempt disable section.
  * The current_context per_cpu variable can only be modified
  * by the current task between lock and unlock. But it can
- * be modified more than once via an interrupt. There are four
- * different contexts that we need to consider.
+ * be modified more than once via an interrupt. To pass this
+ * information from the lock to the unlock without having to
+ * access the 'in_interrupt()' functions again (which do show
+ * a bit of overhead in something as critical as function tracing,
+ * we use a bitmask trick.
  *
- *  Normal context.
- *  SoftIRQ context
- *  IRQ context
- *  NMI context
+ *  bit 0 =  NMI context
+ *  bit 1 =  IRQ context
+ *  bit 2 =  SoftIRQ context
+ *  bit 3 =  normal context.
  *
- * If for some reason the ring buffer starts to recurse, we
- * only allow that to happen at most 4 times (one for each
- * context). If it happens 5 times, then we consider this a
- * recusive loop and do not let it go further.
+ * This works because this is the order of contexts that can
+ * preempt other contexts. A SoftIRQ never preempts an IRQ
+ * context.
+ *
+ * When the context is determined, the corresponding bit is
+ * checked and set (if it was set, then a recursion of that context
+ * happened).
+ *
+ * On unlock, we need to clear this bit. To do so, just subtract
+ * 1 from the current_context and AND it to itself.
+ *
+ * (binary)
+ *  101 - 1 = 100
+ *  101 & 100 = 100 (clearing bit zero)
+ *
+ *  1010 - 1 = 1001
+ *  1010 & 1001 = 1000 (clearing bit 1)
+ *
+ * The least significant bit can be cleared this way, and it
+ * just so happens that it is the same bit corresponding to
+ * the current context.
  */
 
 static __always_inline int
 trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 {
-       if (cpu_buffer->current_context >= 4)
+       unsigned int val = cpu_buffer->current_context;
+       unsigned long pc = preempt_count();
+       int bit;
+
+       if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+               bit = RB_CTX_NORMAL;
+       else
+               bit = pc & NMI_MASK ? RB_CTX_NMI :
+                       pc & HARDIRQ_MASK ? RB_CTX_IRQ :
+                       pc & SOFTIRQ_OFFSET ? 2 : RB_CTX_SOFTIRQ;
+
+       if (unlikely(val & (1 << bit)))
                return 1;
 
-       cpu_buffer->current_context++;
-       /* Interrupts must see this update */
-       barrier();
+       val |= (1 << bit);
+       cpu_buffer->current_context = val;
 
        return 0;
 }
@@ -2564,9 +2594,7 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 static __always_inline void
 trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer)
 {
-       /* Don't let the dec leak out */
-       barrier();
-       cpu_buffer->current_context--;
+       cpu_buffer->current_context &= cpu_buffer->current_context - 1;
 }
 
 /**
index d73c14294f3a61c2385741b447aa31d203a2bc72..f656ca27f6c20596322ce2aea248b4b04085bb29 100644 (file)
 /* GFP bitmask for kmemleak internal allocations */
 #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
                                 __GFP_NORETRY | __GFP_NOMEMALLOC | \
-                                __GFP_NOWARN)
+                                __GFP_NOWARN | __GFP_NOFAIL)
 
 /* scanning area inside a memory block */
 struct kmemleak_scan_area {
index 325c56043007d89886203626a2a139f1ab8fc52a..086a4abdfa7cfcfad4934eb52613f4fe57723eb2 100644 (file)
@@ -543,3 +543,7 @@ static void p9_trans_xen_exit(void)
        return xenbus_unregister_driver(&xen_9pfs_front_driver);
 }
 module_exit(p9_trans_xen_exit);
+
+MODULE_AUTHOR("Stefano Stabellini <stefano@aporeto.com>");
+MODULE_DESCRIPTION("Xen Transport for 9P");
+MODULE_LICENSE("GPL");
index 43ba91c440bcd65bd9f24258a28d01492cf6ac13..fc6615d5916524c446a9f4f952f2f8b7b5b67e16 100644 (file)
@@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
                        break;
 
                case L2CAP_CONF_EFS:
-                       remote_efs = 1;
-                       if (olen == sizeof(efs))
+                       if (olen == sizeof(efs)) {
+                               remote_efs = 1;
                                memcpy(&efs, (void *) val, olen);
+                       }
                        break;
 
                case L2CAP_CONF_EWS:
@@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
                        break;
 
                case L2CAP_CONF_EFS:
-                       if (olen == sizeof(efs))
+                       if (olen == sizeof(efs)) {
                                memcpy(&efs, (void *)val, olen);
 
-                       if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
-                           efs.stype != L2CAP_SERV_NOTRAFIC &&
-                           efs.stype != chan->local_stype)
-                               return -ECONNREFUSED;
+                               if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+                                   efs.stype != L2CAP_SERV_NOTRAFIC &&
+                                   efs.stype != chan->local_stype)
+                                       return -ECONNREFUSED;
 
-                       l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
-                                          (unsigned long) &efs, endptr - ptr);
+                               l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+                                                  (unsigned long) &efs, endptr - ptr);
+                       }
                        break;
 
                case L2CAP_CONF_FCS:
index d1f5fe986edda5ff886575be0eea0b361e2be7ff..7f831711b6e03d63fe12f3a7dfec85f18f531c15 100644 (file)
@@ -532,7 +532,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
        if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
                nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
 
-       hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
+       hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 
        if (n->parms->dead) {
                rc = ERR_PTR(-EINVAL);
@@ -544,7 +544,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
             n1 != NULL;
             n1 = rcu_dereference_protected(n1->next,
                        lockdep_is_held(&tbl->lock))) {
-               if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
+               if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) {
                        if (want_ref)
                                neigh_hold(n1);
                        rc = n1;
index a8d7c5a9fb0523db487e8e51a43dadc192046b1f..6c231b43974d93c213f3e51cfd40a1dceddf48ee 100644 (file)
@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neighbour *neigh, const void *pkey)
 
 static int arp_constructor(struct neighbour *neigh)
 {
-       __be32 addr = *(__be32 *)neigh->primary_key;
+       __be32 addr;
        struct net_device *dev = neigh->dev;
        struct in_device *in_dev;
        struct neigh_parms *parms;
+       u32 inaddr_any = INADDR_ANY;
 
+       if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+               memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len);
+
+       addr = *(__be32 *)neigh->primary_key;
        rcu_read_lock();
        in_dev = __in_dev_get_rcu(dev);
        if (!in_dev) {
index 43b69af242e18d640db1bff451d2ee229fc5e08f..4e153b23bceca1b8f6071febf4bb0df8cca7f12f 100644 (file)
@@ -2762,6 +2762,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                if (err == 0 && rt->dst.error)
                        err = -rt->dst.error;
        } else {
+               fl4.flowi4_iif = LOOPBACK_IFINDEX;
                rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
                err = 0;
                if (IS_ERR(rt))
index 688ba5f7516b37c87b879036dce781bdcfa01739..4f7d8de56611472575862d50b9fd18f677d36e43 100644 (file)
@@ -1206,14 +1206,16 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
        v6_cork->tclass = ipc6->tclass;
        if (rt->dst.flags & DST_XFRM_TUNNEL)
                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-                     rt->dst.dev->mtu : dst_mtu(&rt->dst);
+                     READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
        else
                mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-                     rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+                     READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
        if (np->frag_size < mtu) {
                if (np->frag_size)
                        mtu = np->frag_size;
        }
+       if (mtu < IPV6_MIN_MTU)
+               return -EINVAL;
        cork->base.fragsize = mtu;
        if (dst_allfrag(rt->dst.path))
                cork->base.flags |= IPCORK_ALLFRAG;
@@ -1733,6 +1735,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
        cork.base.flags = 0;
        cork.base.addr = 0;
        cork.base.opt = NULL;
+       cork.base.dst = NULL;
        v6_cork.opt = NULL;
        err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
        if (err) {
index 79cc1bf36e4af7d2c70575e56203a482ba2dca97..47ef2d8683d6cfd87627d5b3b4efa4426eb56784 100644 (file)
@@ -2384,7 +2384,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
                                                   struct nlmsghdr *,
                                                   struct netlink_ext_ack *))
 {
-       struct netlink_ext_ack extack = {};
+       struct netlink_ext_ack extack;
        struct nlmsghdr *nlh;
        int err;
 
@@ -2405,6 +2405,7 @@ int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
                if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
                        goto ack;
 
+               memset(&extack, 0, sizeof(extack));
                err = cb(skb, nlh, &extack);
                if (err == -EINTR)
                        goto skip;
index 624ea74353dd3ba403ccedc822f1c9574982e709..f143908b651dffc38cdbe9ef1d5235015ff08b6a 100644 (file)
@@ -49,7 +49,6 @@
 #include <net/mpls.h>
 #include <net/vxlan.h>
 #include <net/tun_proto.h>
-#include <net/erspan.h>
 
 #include "flow_netlink.h"
 
@@ -334,8 +333,7 @@ size_t ovs_tun_key_attr_size(void)
                 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
                 */
                + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
-               + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_DST */
-               + nla_total_size(4);   /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
+               + nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 }
 
 static size_t ovs_nsh_key_attr_size(void)
@@ -402,7 +400,6 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
                                                .next = ovs_vxlan_ext_key_lens },
        [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
        [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
-       [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = sizeof(u32) },
 };
 
 static const struct ovs_len_tbl
@@ -634,33 +631,6 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
        return 0;
 }
 
-static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
-                                     struct sw_flow_match *match, bool is_mask,
-                                     bool log)
-{
-       unsigned long opt_key_offset;
-       struct erspan_metadata opts;
-
-       BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
-
-       memset(&opts, 0, sizeof(opts));
-       opts.index = nla_get_be32(attr);
-
-       /* Index has only 20-bit */
-       if (ntohl(opts.index) & ~INDEX_MASK) {
-               OVS_NLERR(log, "ERSPAN index number %x too large.",
-                         ntohl(opts.index));
-               return -EINVAL;
-       }
-
-       SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
-       opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
-       SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
-                                 is_mask);
-
-       return 0;
-}
-
 static int ip_tun_from_nlattr(const struct nlattr *attr,
                              struct sw_flow_match *match, bool is_mask,
                              bool log)
@@ -768,19 +738,6 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
                        break;
                case OVS_TUNNEL_KEY_ATTR_PAD:
                        break;
-               case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
-                       if (opts_type) {
-                               OVS_NLERR(log, "Multiple metadata blocks provided");
-                               return -EINVAL;
-                       }
-
-                       err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
-                       if (err)
-                               return err;
-
-                       tun_flags |= TUNNEL_ERSPAN_OPT;
-                       opts_type = type;
-                       break;
                default:
                        OVS_NLERR(log, "Unknown IP tunnel attribute %d",
                                  type);
@@ -905,10 +862,6 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
                else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
                         vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
                        return -EMSGSIZE;
-               else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
-                        nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
-                                     ((struct erspan_metadata *)tun_opts)->index))
-                       return -EMSGSIZE;
        }
 
        return 0;
@@ -2533,8 +2486,6 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
                        break;
                case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
                        break;
-               case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
-                       break;
                }
        };
 
index 8d78e7f4ecc33082517aaab5767a30c119f49dc0..a62586e2dbdb91e4a04226a410035e0cde94b91d 100644 (file)
@@ -183,10 +183,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
        return 0;
 }
 
+static u32 cls_bpf_flags(u32 flags)
+{
+       return flags & CLS_BPF_SUPPORTED_GEN_FLAGS;
+}
+
 static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
                           struct cls_bpf_prog *oldprog)
 {
-       if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+       if (prog && oldprog &&
+           cls_bpf_flags(prog->gen_flags) !=
+           cls_bpf_flags(oldprog->gen_flags))
                return -EINVAL;
 
        if (prog && tc_skip_hw(prog->gen_flags))
index 0f1eab99ff4edb6e7e27f4b4b34552b5ee996cbf..52529b7f8d963ef952b71c086c52ab1588b1d106 100644 (file)
@@ -1063,17 +1063,6 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
        }
 
        if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
-               if (qdisc_is_percpu_stats(sch)) {
-                       sch->cpu_bstats =
-                               netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
-                       if (!sch->cpu_bstats)
-                               goto err_out4;
-
-                       sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
-                       if (!sch->cpu_qstats)
-                               goto err_out4;
-               }
-
                if (tca[TCA_STAB]) {
                        stab = qdisc_get_stab(tca[TCA_STAB]);
                        if (IS_ERR(stab)) {
@@ -1115,7 +1104,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
                ops->destroy(sch);
 err_out3:
        dev_put(dev);
-       kfree((char *) sch - sch->padded);
+       qdisc_free(sch);
 err_out2:
        module_put(ops->owner);
 err_out:
@@ -1123,8 +1112,6 @@ err_out:
        return NULL;
 
 err_out4:
-       free_percpu(sch->cpu_bstats);
-       free_percpu(sch->cpu_qstats);
        /*
         * Any broken qdiscs that would require a ops->reset() here?
         * The qdisc was never in action so it shouldn't be necessary.
index 661c7144b53af048b3a65484777910e2d60f25aa..cac003fddf3e5d54e8af29af4f69b80bbd7dc331 100644 (file)
@@ -633,6 +633,19 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
        qdisc_skb_head_init(&sch->q);
        spin_lock_init(&sch->q.lock);
 
+       if (ops->static_flags & TCQ_F_CPUSTATS) {
+               sch->cpu_bstats =
+                       netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+               if (!sch->cpu_bstats)
+                       goto errout1;
+
+               sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+               if (!sch->cpu_qstats) {
+                       free_percpu(sch->cpu_bstats);
+                       goto errout1;
+               }
+       }
+
        spin_lock_init(&sch->busylock);
        lockdep_set_class(&sch->busylock,
                          dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
@@ -642,6 +655,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                          dev->qdisc_running_key ?: &qdisc_running_key);
 
        sch->ops = ops;
+       sch->flags = ops->static_flags;
        sch->enqueue = ops->enqueue;
        sch->dequeue = ops->dequeue;
        sch->dev_queue = dev_queue;
@@ -649,6 +663,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
        refcount_set(&sch->refcnt, 1);
 
        return sch;
+errout1:
+       kfree(p);
 errout:
        return ERR_PTR(err);
 }
@@ -698,7 +714,7 @@ void qdisc_reset(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_reset);
 
-static void qdisc_free(struct Qdisc *qdisc)
+void qdisc_free(struct Qdisc *qdisc)
 {
        if (qdisc_is_percpu_stats(qdisc)) {
                free_percpu(qdisc->cpu_bstats);
index fc1286f499c1462ab29c5054f734237788974e0e..003e1b063447d2504ea886f1fd11dce8e446b38a 100644 (file)
@@ -66,7 +66,6 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct ingress_sched_data *q = qdisc_priv(sch);
        struct net_device *dev = qdisc_dev(sch);
-       int err;
 
        net_inc_ingress_queue();
 
@@ -76,13 +75,7 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
        q->block_info.chain_head_change = clsact_chain_head_change;
        q->block_info.chain_head_change_priv = &q->miniqp;
 
-       err = tcf_block_get_ext(&q->block, sch, &q->block_info);
-       if (err)
-               return err;
-
-       sch->flags |= TCQ_F_CPUSTATS;
-
-       return 0;
+       return tcf_block_get_ext(&q->block, sch, &q->block_info);
 }
 
 static void ingress_destroy(struct Qdisc *sch)
@@ -121,6 +114,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
        .cl_ops         =       &ingress_class_ops,
        .id             =       "ingress",
        .priv_size      =       sizeof(struct ingress_sched_data),
+       .static_flags   =       TCQ_F_CPUSTATS,
        .init           =       ingress_init,
        .destroy        =       ingress_destroy,
        .dump           =       ingress_dump,
@@ -192,13 +186,7 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
        q->egress_block_info.chain_head_change = clsact_chain_head_change;
        q->egress_block_info.chain_head_change_priv = &q->miniqp_egress;
 
-       err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
-       if (err)
-               return err;
-
-       sch->flags |= TCQ_F_CPUSTATS;
-
-       return 0;
+       return tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info);
 }
 
 static void clsact_destroy(struct Qdisc *sch)
@@ -225,6 +213,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
        .cl_ops         =       &clsact_class_ops,
        .id             =       "clsact",
        .priv_size      =       sizeof(struct clsact_sched_data),
+       .static_flags   =       TCQ_F_CPUSTATS,
        .init           =       clsact_init,
        .destroy        =       clsact_destroy,
        .dump           =       ingress_dump,
index 3b18085e3b10253f3f81be7a6747b50ef9357db2..5d4c15bf66d26219415596598a1f72d29b63a798 100644 (file)
@@ -826,6 +826,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
        case AF_INET:
                if (!__ipv6_only_sock(sctp_opt2sk(sp)))
                        return 1;
+               /* fallthru */
        default:
                return 0;
        }
index 7d67feeeffc1e758ae4be4ef1ddaea23276d1f5e..c4ec99b2015002b273071e6fb1ec3c59c9f61154 100644 (file)
@@ -918,9 +918,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
                        break;
 
                case SCTP_CID_ABORT:
-                       if (sctp_test_T_bit(chunk)) {
+                       if (sctp_test_T_bit(chunk))
                                packet->vtag = asoc->c.my_vtag;
-                       }
+                       /* fallthru */
                /* The following chunks are "response" chunks, i.e.
                 * they are generated in response to something we
                 * received.  If we are sending these, then we can
index 9b01e994f66108a12abc31f452482f1de8749d84..039fcb618c34985f5b2a337d6a0876bca24b28da 100644 (file)
@@ -85,7 +85,7 @@
 static int sctp_writeable(struct sock *sk);
 static void sctp_wfree(struct sk_buff *skb);
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
-                               size_t msg_len, struct sock **orig_sk);
+                               size_t msg_len);
 static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
 static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
 static int sctp_wait_for_accept(struct sock *sk, long timeo);
@@ -335,16 +335,14 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
        if (len < sizeof (struct sockaddr))
                return NULL;
 
+       if (!opt->pf->af_supported(addr->sa.sa_family, opt))
+               return NULL;
+
        /* V4 mapped address are really of AF_INET family */
        if (addr->sa.sa_family == AF_INET6 &&
-           ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
-               if (!opt->pf->af_supported(AF_INET, opt))
-                       return NULL;
-       } else {
-               /* Does this PF support this AF? */
-               if (!opt->pf->af_supported(addr->sa.sa_family, opt))
-                       return NULL;
-       }
+           ipv6_addr_v4mapped(&addr->v6.sin6_addr) &&
+           !opt->pf->af_supported(AF_INET, opt))
+               return NULL;
 
        /* If we get this far, af is valid. */
        af = sctp_get_af_specific(addr->sa.sa_family);
@@ -1883,8 +1881,14 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
                 */
                if (sinit) {
                        if (sinit->sinit_num_ostreams) {
-                               asoc->c.sinit_num_ostreams =
-                                       sinit->sinit_num_ostreams;
+                               __u16 outcnt = sinit->sinit_num_ostreams;
+
+                               asoc->c.sinit_num_ostreams = outcnt;
+                               /* outcnt has been changed, so re-init stream */
+                               err = sctp_stream_init(&asoc->stream, outcnt, 0,
+                                                      GFP_KERNEL);
+                               if (err)
+                                       goto out_free;
                        }
                        if (sinit->sinit_max_instreams) {
                                asoc->c.sinit_max_instreams =
@@ -1971,7 +1975,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
        timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
        if (!sctp_wspace(asoc)) {
                /* sk can be changed by peel off when waiting for buf. */
-               err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk);
+               err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
                if (err) {
                        if (err == -ESRCH) {
                                /* asoc is already dead. */
@@ -8016,12 +8020,12 @@ void sctp_sock_rfree(struct sk_buff *skb)
 
 /* Helper function to wait for space in the sndbuf.  */
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
-                               size_t msg_len, struct sock **orig_sk)
+                               size_t msg_len)
 {
        struct sock *sk = asoc->base.sk;
-       int err = 0;
        long current_timeo = *timeo_p;
        DEFINE_WAIT(wait);
+       int err = 0;
 
        pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc,
                 *timeo_p, msg_len);
@@ -8050,17 +8054,13 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
                release_sock(sk);
                current_timeo = schedule_timeout(current_timeo);
                lock_sock(sk);
-               if (sk != asoc->base.sk) {
-                       release_sock(sk);
-                       sk = asoc->base.sk;
-                       lock_sock(sk);
-               }
+               if (sk != asoc->base.sk)
+                       goto do_error;
 
                *timeo_p = current_timeo;
        }
 
 out:
-       *orig_sk = sk;
        finish_wait(&asoc->wait, &wait);
 
        /* Release the association's refcnt.  */
index 507017fe0f1b5263f819411388240ef64cab935d..9036d8756e731ab02d75a520c489728cee621faf 100644 (file)
@@ -1880,36 +1880,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
 
        if (strcmp(name, tipc_bclink_name) == 0) {
                err = tipc_nl_add_bc_link(net, &msg);
-               if (err) {
-                       nlmsg_free(msg.skb);
-                       return err;
-               }
+               if (err)
+                       goto err_free;
        } else {
                int bearer_id;
                struct tipc_node *node;
                struct tipc_link *link;
 
                node = tipc_node_find_by_name(net, name, &bearer_id);
-               if (!node)
-                       return -EINVAL;
+               if (!node) {
+                       err = -EINVAL;
+                       goto err_free;
+               }
 
                tipc_node_read_lock(node);
                link = node->links[bearer_id].link;
                if (!link) {
                        tipc_node_read_unlock(node);
-                       nlmsg_free(msg.skb);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto err_free;
                }
 
                err = __tipc_nl_add_link(net, &msg, link, 0);
                tipc_node_read_unlock(node);
-               if (err) {
-                       nlmsg_free(msg.skb);
-                       return err;
-               }
+               if (err)
+                       goto err_free;
        }
 
        return genlmsg_reply(msg.skb, info);
+
+err_free:
+       nlmsg_free(msg.skb);
+       return err;
 }
 
 int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
index e07ee3ae002300932f8fd41a907cee2fc042738f..736719c8314e83e0c0fca2dabed30a1535fbfdcb 100644 (file)
@@ -367,8 +367,10 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
 
        crypto_info = &ctx->crypto_send;
        /* Currently we don't support set crypto info more than one time */
-       if (TLS_CRYPTO_INFO_READY(crypto_info))
+       if (TLS_CRYPTO_INFO_READY(crypto_info)) {
+               rc = -EBUSY;
                goto out;
+       }
 
        rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info));
        if (rc) {
@@ -386,7 +388,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
        case TLS_CIPHER_AES_GCM_128: {
                if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
                        rc = -EINVAL;
-                       goto out;
+                       goto err_crypto_info;
                }
                rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info),
                                    optlen - sizeof(*crypto_info));
@@ -398,7 +400,7 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval,
        }
        default:
                rc = -EINVAL;
-               goto out;
+               goto err_crypto_info;
        }
 
        /* currently SW is default, we will have ethtool in future */
@@ -454,6 +456,15 @@ static int tls_init(struct sock *sk)
        struct tls_context *ctx;
        int rc = 0;
 
+       /* The TLS ulp is currently supported only for TCP sockets
+        * in ESTABLISHED state.
+        * Supporting sockets in LISTEN state will require us
+        * to modify the accept implementation to clone rather then
+        * share the ulp context.
+        */
+       if (sk->sk_state != TCP_ESTABLISHED)
+               return -ENOTSUPP;
+
        /* allocate tls context */
        ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
        if (!ctx) {
index 73d19210dd497193ff545ee15305113e6090f731..61f394d369bf86d69c44b84950632cd5c3519905 100644 (file)
@@ -391,7 +391,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 
        while (msg_data_left(msg)) {
                if (sk->sk_err) {
-                       ret = sk->sk_err;
+                       ret = -sk->sk_err;
                        goto send_end;
                }
 
@@ -544,7 +544,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
                size_t copy, required_size;
 
                if (sk->sk_err) {
-                       ret = sk->sk_err;
+                       ret = -sk->sk_err;
                        goto sendpage_end;
                }
 
@@ -681,18 +681,17 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
        }
        default:
                rc = -EINVAL;
-               goto out;
+               goto free_priv;
        }
 
        ctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
        ctx->tag_size = tag_size;
        ctx->overhead_size = ctx->prepend_size + ctx->tag_size;
        ctx->iv_size = iv_size;
-       ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-                         GFP_KERNEL);
+       ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL);
        if (!ctx->iv) {
                rc = -ENOMEM;
-               goto out;
+               goto free_priv;
        }
        memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
        memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
@@ -740,7 +739,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx)
 
        rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size);
        if (!rc)
-               goto out;
+               return 0;
 
 free_aead:
        crypto_free_aead(sw_ctx->aead_send);
@@ -751,6 +750,9 @@ free_rec_seq:
 free_iv:
        kfree(ctx->iv);
        ctx->iv = NULL;
+free_priv:
+       kfree(ctx->priv_ctx);
+       ctx->priv_ctx = NULL;
 out:
        return rc;
 }
index fdde0d98fde16296daf841c2ac102fc20ca2cd5c..a6f3cac8c640e4cdb0eb4fb9d3c77bf3fd352576 100644 (file)
@@ -439,6 +439,8 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
                if (rv)
                        goto use_default_name;
        } else {
+               int rv;
+
 use_default_name:
                /* NOTE:  This is *probably* safe w/out holding rtnl because of
                 * the restrictions on phy names.  Probably this call could
@@ -446,7 +448,11 @@ use_default_name:
                 * phyX.  But, might should add some locking and check return
                 * value, and use a different name if this one exists?
                 */
-               dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
+               rv = dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx);
+               if (rv < 0) {
+                       kfree(rdev);
+                       return NULL;
+               }
        }
 
        INIT_LIST_HEAD(&rdev->wiphy.wdev_list);
index d2f7e8b8a097c00c0cf6da78f8a1e4d55600296b..eaff636169c220f7ca0218911cdbe4b711b150a0 100644 (file)
@@ -507,8 +507,6 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
                       struct wireless_dev *wdev);
 
-#define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
-
 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
 #define CFG80211_DEV_WARN_ON(cond)     WARN_ON(cond)
 #else
index 2b3dbcd40e46390debf84f4a127b23f185e76b39..ed87a97fcb0b040720addf4ecfa2ae6709573f41 100644 (file)
@@ -2618,12 +2618,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
                const u8 *ssid_ie;
                if (!wdev->current_bss)
                        break;
+               rcu_read_lock();
                ssid_ie = ieee80211_bss_get_ie(&wdev->current_bss->pub,
                                               WLAN_EID_SSID);
-               if (!ssid_ie)
-                       break;
-               if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
-                       goto nla_put_failure_locked;
+               if (ssid_ie &&
+                   nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
+                       goto nla_put_failure_rcu_locked;
+               rcu_read_unlock();
                break;
                }
        default:
@@ -2635,6 +2636,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
        genlmsg_end(msg, hdr);
        return 0;
 
+ nla_put_failure_rcu_locked:
+       rcu_read_unlock();
  nla_put_failure_locked:
        wdev_unlock(wdev);
  nla_put_failure:
index 78e71b0390be90bc16655d380fa6869391c52729..7b42f0bacfd843481adb1648c24a5f87c4395b1a 100644 (file)
@@ -1769,8 +1769,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
        if (wiphy->regulatory_flags & REGULATORY_DISABLE_BEACON_HINTS)
                return;
 
-       chan_before.center_freq = chan->center_freq;
-       chan_before.flags = chan->flags;
+       chan_before = *chan;
 
        if (chan->flags & IEEE80211_CHAN_NO_IR) {
                chan->flags &= ~IEEE80211_CHAN_NO_IR;
index 86dc07a01b4398a3e72e79584a2c447bb5643780..e7836b47f060889a036405e7cf70d04c4177adf9 100644 (file)
@@ -1,4 +1,3 @@
-*.hash.c
 *.lex.c
 *.tab.c
 *.tab.h
index cbf4996dd9c1045ecc7ffa66a9ec79edd99b1038..8cee597d33a594d3d7380390aa2b697218034578 100644 (file)
@@ -893,7 +893,10 @@ static enum string_value_kind expr_parse_string(const char *str,
        switch (type) {
        case S_BOOLEAN:
        case S_TRISTATE:
-               return k_string;
+               val->s = !strcmp(str, "n") ? 0 :
+                        !strcmp(str, "m") ? 1 :
+                        !strcmp(str, "y") ? 2 : -1;
+               return k_signed;
        case S_INT:
                val->s = strtoll(str, &tail, 10);
                kind = k_signed;
index 3d4debd0257e2544996ac97c90831bde3a854e7d..b0cb9a5f94480d2c5aaf290b31fd15e6c5930ec2 100644 (file)
@@ -63,7 +63,7 @@ config PAGE_TABLE_ISOLATION
          ensuring that the majority of kernel addresses are not mapped
          into userspace.
 
-         See Documentation/x86/pagetable-isolation.txt for more details.
+         See Documentation/x86/pti.txt for more details.
 
 config SECURITY_INFINIBAND
        bool "Infiniband Security Hooks"
index 04ba9d0718ea590b7c5033cfc4b952c701acb54d..6a54d2ffa84012fc7fd27aef18c7fa0410e050e3 100644 (file)
@@ -330,10 +330,7 @@ static struct aa_profile *__attach_match(const char *name,
                        continue;
 
                if (profile->xmatch) {
-                       if (profile->xmatch_len == len) {
-                               conflict = true;
-                               continue;
-                       } else if (profile->xmatch_len > len) {
+                       if (profile->xmatch_len >= len) {
                                unsigned int state;
                                u32 perm;
 
@@ -342,6 +339,10 @@ static struct aa_profile *__attach_match(const char *name,
                                perm = dfa_user_allow(profile->xmatch, state);
                                /* any accepting state means a valid match. */
                                if (perm & MAY_EXEC) {
+                                       if (profile->xmatch_len == len) {
+                                               conflict = true;
+                                               continue;
+                                       }
                                        candidate = profile;
                                        len = profile->xmatch_len;
                                        conflict = false;
index 2b27bb79aec4421665aaa6f2a2490868227bed0f..d7b7e711516010806d194ed39b2a24e1a9f9fd09 100644 (file)
@@ -133,6 +133,9 @@ extern struct aa_perms allperms;
 #define xcheck_labels_profiles(L1, L2, FN, args...)            \
        xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args)
 
+#define xcheck_labels(L1, L2, P, FN1, FN2)                     \
+       xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2)))
+
 
 void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
 void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
index 7ca0032e7ba96ef374aefe98d47acb919f3f81d3..b40678f3c1d5a4d62eeb1255f26e2a73aabd6767 100644 (file)
@@ -64,40 +64,48 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va)
                        FLAGS_NONE, GFP_ATOMIC);
 }
 
+/* assumes check for PROFILE_MEDIATES is already done */
 /* TODO: conditionals */
 static int profile_ptrace_perm(struct aa_profile *profile,
-                              struct aa_profile *peer, u32 request,
-                              struct common_audit_data *sa)
+                            struct aa_label *peer, u32 request,
+                            struct common_audit_data *sa)
 {
        struct aa_perms perms = { };
 
-       /* need because of peer in cross check */
-       if (profile_unconfined(profile) ||
-           !PROFILE_MEDIATES(profile, AA_CLASS_PTRACE))
-               return 0;
-
-       aad(sa)->peer = &peer->label;
-       aa_profile_match_label(profile, &peer->label, AA_CLASS_PTRACE, request,
+       aad(sa)->peer = peer;
+       aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request,
                               &perms);
        aa_apply_modes_to_perms(profile, &perms);
        return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb);
 }
 
-static int cross_ptrace_perm(struct aa_profile *tracer,
-                            struct aa_profile *tracee, u32 request,
-                            struct common_audit_data *sa)
+static int profile_tracee_perm(struct aa_profile *tracee,
+                              struct aa_label *tracer, u32 request,
+                              struct common_audit_data *sa)
 {
+       if (profile_unconfined(tracee) || unconfined(tracer) ||
+           !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE))
+               return 0;
+
+       return profile_ptrace_perm(tracee, tracer, request, sa);
+}
+
+static int profile_tracer_perm(struct aa_profile *tracer,
+                              struct aa_label *tracee, u32 request,
+                              struct common_audit_data *sa)
+{
+       if (profile_unconfined(tracer))
+               return 0;
+
        if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE))
-               return xcheck(profile_ptrace_perm(tracer, tracee, request, sa),
-                             profile_ptrace_perm(tracee, tracer,
-                                                 request << PTRACE_PERM_SHIFT,
-                                                 sa));
-       /* policy uses the old style capability check for ptrace */
-       if (profile_unconfined(tracer) || tracer == tracee)
+               return profile_ptrace_perm(tracer, tracee, request, sa);
+
+       /* profile uses the old style capability check for ptrace */
+       if (&tracer->label == tracee)
                return 0;
 
        aad(sa)->label = &tracer->label;
-       aad(sa)->peer = &tracee->label;
+       aad(sa)->peer = tracee;
        aad(sa)->request = 0;
        aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1);
 
@@ -115,10 +123,13 @@ static int cross_ptrace_perm(struct aa_profile *tracer,
 int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
                  u32 request)
 {
+       struct aa_profile *profile;
+       u32 xrequest = request << PTRACE_PERM_SHIFT;
        DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE);
 
-       return xcheck_labels_profiles(tracer, tracee, cross_ptrace_perm,
-                                     request, &sa);
+       return xcheck_labels(tracer, tracee, profile,
+                       profile_tracer_perm(profile, tracee, request, &sa),
+                       profile_tracee_perm(profile, tracer, xrequest, &sa));
 }
 
 
index db7894bb028ccc6ca941c45c4a7e37602ec1228d..faa67861cbc17e1ff7a9e409f76c717179d22370 100644 (file)
@@ -560,7 +560,6 @@ static inline unsigned int muldiv32(unsigned int a, unsigned int b,
 {
        u_int64_t n = (u_int64_t) a * b;
        if (c == 0) {
-               snd_BUG_ON(!n);
                *r = 0;
                return UINT_MAX;
        }
index 6e22eea72654e6ce2208500fa7329fc1c2aedf35..d0191340458114761a6c9969605f17585b51b610 100644 (file)
@@ -221,6 +221,7 @@ static struct snd_seq_client *seq_create_client1(int client_index, int poolsize)
        rwlock_init(&client->ports_lock);
        mutex_init(&client->ports_mutex);
        INIT_LIST_HEAD(&client->ports_list_head);
+       mutex_init(&client->ioctl_mutex);
 
        /* find free slot in the client table */
        spin_lock_irqsave(&clients_lock, flags);
@@ -2130,7 +2131,9 @@ static long snd_seq_ioctl(struct file *file, unsigned int cmd,
                        return -EFAULT;
        }
 
+       mutex_lock(&client->ioctl_mutex);
        err = handler->func(client, &buf);
+       mutex_unlock(&client->ioctl_mutex);
        if (err >= 0) {
                /* Some commands includes a bug in 'dir' field. */
                if (handler->cmd == SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT ||
index c6614254ef8af2f56b68193dc4ce4d35d49b3773..0611e1e0ed5ba01e053ac3a53c5f09ae681e02a2 100644 (file)
@@ -61,6 +61,7 @@ struct snd_seq_client {
        struct list_head ports_list_head;
        rwlock_t ports_lock;
        struct mutex ports_mutex;
+       struct mutex ioctl_mutex;
        int convert32;          /* convert 32->64bit */
 
        /* output pool */
index 80bbadc83721447754392238118eee98484616b6..d6e079f4ec09d29624a05d3a18413e55910f0cf6 100644 (file)
@@ -408,6 +408,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = {
        /*SND_PCI_QUIRK(0x8086, 0x7270, "IMac 27 Inch", CS420X_IMAC27),*/
 
        /* codec SSID */
+       SND_PCI_QUIRK(0x106b, 0x0600, "iMac 14,1", CS420X_IMAC27_122),
        SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81),
        SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122),
        SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101),
index 8fd2d9c62c96ce53a78dced9d0cd3529961978a9..9aafc6c861329d0c2cb4aeb45c11acb722662520 100644 (file)
@@ -6196,6 +6196,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
        SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
        SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
+       SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
        SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
index ae0272f9a09184db54933fe079b9cf529bcf47d5..e6acc281dd3757dbd03ba32c69a89b76ed690800 100644 (file)
@@ -46,7 +46,7 @@ $(OBJTOOL_IN): fixdep FORCE
        @$(MAKE) $(build)=objtool
 
 $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
-       @./sync-check.sh
+       @$(CONFIG_SHELL) ./sync-check.sh
        $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
 
 
index 9b341584eb1b56b05761bea127ee6cf94eacc190..f40d46e24bcce3efe385aff13a4cb9274967d0d6 100644 (file)
@@ -427,6 +427,40 @@ static void add_ignores(struct objtool_file *file)
        }
 }
 
+/*
+ * FIXME: For now, just ignore any alternatives which add retpolines.  This is
+ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
+ * But it at least allows objtool to understand the control flow *around* the
+ * retpoline.
+ */
+static int add_nospec_ignores(struct objtool_file *file)
+{
+       struct section *sec;
+       struct rela *rela;
+       struct instruction *insn;
+
+       sec = find_section_by_name(file->elf, ".rela.discard.nospec");
+       if (!sec)
+               return 0;
+
+       list_for_each_entry(rela, &sec->rela_list, list) {
+               if (rela->sym->type != STT_SECTION) {
+                       WARN("unexpected relocation symbol type in %s", sec->name);
+                       return -1;
+               }
+
+               insn = find_insn(file, rela->sym->sec, rela->addend);
+               if (!insn) {
+                       WARN("bad .discard.nospec entry");
+                       return -1;
+               }
+
+               insn->ignore_alts = true;
+       }
+
+       return 0;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -456,6 +490,13 @@ static int add_jump_destinations(struct objtool_file *file)
                } else if (rela->sym->sec->idx) {
                        dest_sec = rela->sym->sec;
                        dest_off = rela->sym->sym.st_value + rela->addend + 4;
+               } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
+                       /*
+                        * Retpoline jumps are really dynamic jumps in
+                        * disguise, so convert them accordingly.
+                        */
+                       insn->type = INSN_JUMP_DYNAMIC;
+                       continue;
                } else {
                        /* sibling call */
                        insn->jump_dest = 0;
@@ -502,11 +543,18 @@ static int add_call_destinations(struct objtool_file *file)
                        dest_off = insn->offset + insn->len + insn->immediate;
                        insn->call_dest = find_symbol_by_offset(insn->sec,
                                                                dest_off);
+                       /*
+                        * FIXME: Thanks to retpolines, it's now considered
+                        * normal for a function to call within itself.  So
+                        * disable this warning for now.
+                        */
+#if 0
                        if (!insn->call_dest) {
                                WARN_FUNC("can't find call dest symbol at offset 0x%lx",
                                          insn->sec, insn->offset, dest_off);
                                return -1;
                        }
+#endif
                } else if (rela->sym->type == STT_SECTION) {
                        insn->call_dest = find_symbol_by_offset(rela->sym->sec,
                                                                rela->addend+4);
@@ -671,12 +719,6 @@ static int add_special_section_alts(struct objtool_file *file)
                return ret;
 
        list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
-               alt = malloc(sizeof(*alt));
-               if (!alt) {
-                       WARN("malloc failed");
-                       ret = -1;
-                       goto out;
-               }
 
                orig_insn = find_insn(file, special_alt->orig_sec,
                                      special_alt->orig_off);
@@ -687,6 +729,10 @@ static int add_special_section_alts(struct objtool_file *file)
                        goto out;
                }
 
+               /* Ignore retpoline alternatives. */
+               if (orig_insn->ignore_alts)
+                       continue;
+
                new_insn = NULL;
                if (!special_alt->group || special_alt->new_len) {
                        new_insn = find_insn(file, special_alt->new_sec,
@@ -712,6 +758,13 @@ static int add_special_section_alts(struct objtool_file *file)
                                goto out;
                }
 
+               alt = malloc(sizeof(*alt));
+               if (!alt) {
+                       WARN("malloc failed");
+                       ret = -1;
+                       goto out;
+               }
+
                alt->insn = new_insn;
                list_add_tail(&alt->list, &orig_insn->alts);
 
@@ -1028,6 +1081,10 @@ static int decode_sections(struct objtool_file *file)
 
        add_ignores(file);
 
+       ret = add_nospec_ignores(file);
+       if (ret)
+               return ret;
+
        ret = add_jump_destinations(file);
        if (ret)
                return ret;
index 47d9ea70a83d9f9326fb7bd68431c88337b5ce31..dbadb304a410af0343809752bc6d2869668a9db0 100644 (file)
@@ -44,7 +44,7 @@ struct instruction {
        unsigned int len;
        unsigned char type;
        unsigned long immediate;
-       bool alt_group, visited, dead_end, ignore, hint, save, restore;
+       bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
        struct symbol *call_dest;
        struct instruction *jump_dest;
        struct list_head alts;
index 939a337128dbf3fb08277995de98437b10b037b8..5d4f10ac2af226b58c7ece2f965749b72899ddc6 100644 (file)
@@ -7,7 +7,7 @@ include ../lib.mk
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
                        check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
-                       protection_keys test_vdso
+                       protection_keys test_vdso test_vsyscall
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
                        vdso_restorer
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644 (file)
index 0000000..7a744fa
--- /dev/null
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+                      int flags)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_sigaction = handler;
+       sa.sa_flags = SA_SIGINFO | flags;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+       void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso)
+               vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso) {
+               printf("[WARN]\tfailed to find vDSO\n");
+               return;
+       }
+
+       vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+       if (!vdso_gtod)
+               printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+       vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+       if (!vdso_gettime)
+               printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+       vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+       if (!vdso_time)
+               printf("[WARN]\tfailed to find time in vDSO\n");
+
+       vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+       if (!vdso_getcpu) {
+               /* getcpu() was never wired up in the 32-bit vDSO. */
+               printf("[%s]\tfailed to find getcpu in vDSO\n",
+                      sizeof(long) == 8 ? "WARN" : "NOTE");
+       }
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+       int nerrs = 0;
+       FILE *maps;
+       char line[128];
+       bool found = false;
+
+       maps = fopen("/proc/self/maps", "r");
+       if (!maps) {
+               printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+               should_read_vsyscall = true;
+               return 0;
+       }
+
+       while (fgets(line, sizeof(line), maps)) {
+               char r, x;
+               void *start, *end;
+               char name[128];
+               if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+                          &start, &end, &r, &x, name) != 5)
+                       continue;
+
+               if (strcmp(name, "[vsyscall]"))
+                       continue;
+
+               printf("\tvsyscall map: %s", line);
+
+               if (start != (void *)0xffffffffff600000 ||
+                   end != (void *)0xffffffffff601000) {
+                       printf("[FAIL]\taddress range is nonsense\n");
+                       nerrs++;
+               }
+
+               printf("\tvsyscall permissions are %c-%c\n", r, x);
+               should_read_vsyscall = (r == 'r');
+               if (x != 'x') {
+                       vgtod = NULL;
+                       vtime = NULL;
+                       vgetcpu = NULL;
+               }
+
+               found = true;
+               break;
+       }
+
+       fclose(maps);
+
+       if (!found) {
+               printf("\tno vsyscall map in /proc/self/maps\n");
+               should_read_vsyscall = false;
+               vgtod = NULL;
+               vtime = NULL;
+               vgetcpu = NULL;
+       }
+
+       return nerrs;
+#else
+       return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+       return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+       return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+       return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+                             void* cache)
+{
+       return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+       siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+       return (double)(a->tv_sec - b->tv_sec) +
+               (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+                     const struct timeval *tv_sys2,
+                     const struct timezone *tz_sys,
+                     const char *which,
+                     const struct timeval *tv_other,
+                     const struct timezone *tz_other)
+{
+       int nerrs = 0;
+       double d1, d2;
+
+       if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+               printf("[FAIL] %s tz mismatch\n", which);
+               nerrs++;
+       }
+
+       d1 = tv_diff(tv_other, tv_sys1);
+       d2 = tv_diff(tv_sys2, tv_other); 
+       printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+       if (d1 < 0 || d2 < 0) {
+               printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+               nerrs++;
+       } else {
+               printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+       }
+
+       return nerrs;
+}
+
+static int test_gtod(void)
+{
+       struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+       struct timezone tz_sys, tz_vdso, tz_vsys;
+       long ret_vdso = -1;
+       long ret_vsys = -1;
+       int nerrs = 0;
+
+       printf("[RUN]\ttest gettimeofday()\n");
+
+       if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+               err(1, "syscall gettimeofday");
+       if (vdso_gtod)
+               ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+       if (vgtod)
+               ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+       if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+               err(1, "syscall gettimeofday");
+
+       if (vdso_gtod) {
+               if (ret_vdso == 0) {
+                       nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+               } else {
+                       printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+                       nerrs++;
+               }
+       }
+
+       if (vgtod) {
+               if (ret_vsys == 0) {
+                       nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+               } else {
+                       printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+                       nerrs++;
+               }
+       }
+
+       return nerrs;
+}
+
+static int test_time(void) {
+       int nerrs = 0;
+
+       printf("[RUN]\ttest time()\n");
+       long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+       long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+       t_sys1 = sys_time(&t2_sys1);
+       if (vdso_time)
+               t_vdso = vdso_time(&t2_vdso);
+       if (vtime)
+               t_vsys = vtime(&t2_vsys);
+       t_sys2 = sys_time(&t2_sys2);
+       if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+               printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+               nerrs++;
+               return nerrs;
+       }
+
+       if (vdso_time) {
+               if (t_vdso < 0 || t_vdso != t2_vdso) {
+                       printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+                       nerrs++;
+               } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+                       printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+                       nerrs++;
+               } else {
+                       printf("[OK]\tvDSO time() is okay\n");
+               }
+       }
+
+       if (vtime) {
+               if (t_vsys < 0 || t_vsys != t2_vsys) {
+                       printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+                       nerrs++;
+               } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+                       printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+                       nerrs++;
+               } else {
+                       printf("[OK]\tvsyscall time() is okay\n");
+               }
+       }
+
+       return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+       int nerrs = 0;
+       long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+       printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+       cpu_set_t cpuset;
+       CPU_ZERO(&cpuset);
+       CPU_SET(cpu, &cpuset);
+       if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+               printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+               return nerrs;
+       }
+
+       unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+       unsigned node = 0;
+       bool have_node = false;
+       ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+       if (vdso_getcpu)
+               ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+       if (vgetcpu)
+               ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+       if (ret_sys == 0) {
+               if (cpu_sys != cpu) {
+                       printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+                       nerrs++;
+               }
+
+               have_node = true;
+               node = node_sys;
+       }
+
+       if (vdso_getcpu) {
+               if (ret_vdso) {
+                       printf("[FAIL]\tvDSO getcpu() failed\n");
+                       nerrs++;
+               } else {
+                       if (!have_node) {
+                               have_node = true;
+                               node = node_vdso;
+                       }
+
+                       if (cpu_vdso != cpu) {
+                               printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+                               nerrs++;
+                       } else {
+                               printf("[OK]\tvDSO reported correct CPU\n");
+                       }
+
+                       if (node_vdso != node) {
+                               printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+                               nerrs++;
+                       } else {
+                               printf("[OK]\tvDSO reported correct node\n");
+                       }
+               }
+       }
+
+       if (vgetcpu) {
+               if (ret_vsys) {
+                       printf("[FAIL]\tvsyscall getcpu() failed\n");
+                       nerrs++;
+               } else {
+                       if (!have_node) {
+                               have_node = true;
+                               node = node_vsys;
+                       }
+
+                       if (cpu_vsys != cpu) {
+                               printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+                               nerrs++;
+                       } else {
+                               printf("[OK]\tvsyscall reported correct CPU\n");
+                       }
+
+                       if (node_vsys != node) {
+                               printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+                               nerrs++;
+                       } else {
+                               printf("[OK]\tvsyscall reported correct node\n");
+                       }
+               }
+       }
+
+       return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+       printf("[RUN]\tChecking read access to the vsyscall page\n");
+       bool can_read;
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               *(volatile int *)0xffffffffff600000;
+               can_read = true;
+       } else {
+               can_read = false;
+       }
+
+       if (can_read && !should_read_vsyscall) {
+               printf("[FAIL]\tWe have read access, but we shouldn't\n");
+               return 1;
+       } else if (!can_read && should_read_vsyscall) {
+               printf("[FAIL]\tWe don't have read access, but we should\n");
+               return 1;
+       } else {
+               printf("[OK]\tgot expected result\n");
+       }
+#endif
+
+       return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+       unsigned long eflags;
+       asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+       return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+       asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+       ucontext_t *ctx = (ucontext_t *)ctx_void;
+       unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+       if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+               num_vsyscall_traps++;
+}
+
+static int test_native_vsyscall(void)
+{
+       time_t tmp;
+       bool is_native;
+
+       if (!vtime)
+               return 0;
+
+       printf("[RUN]\tchecking for native vsyscall\n");
+       sethandler(SIGTRAP, sigtrap, 0);
+       set_eflags(get_eflags() | X86_EFLAGS_TF);
+       vtime(&tmp);
+       set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+       /*
+        * If vsyscalls are emulated, we expect a single trap in the
+        * vsyscall page -- the call instruction will trap with RIP
+        * pointing to the entry point before emulation takes over.
+        * In native mode, we expect two traps, since whatever code
+        * the vsyscall page contains will be more than just a ret
+        * instruction.
+        */
+       is_native = (num_vsyscall_traps > 1);
+
+       printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+              (is_native ? "native" : "emulated"),
+              (int)num_vsyscall_traps);
+
+       return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+       int nerrs = 0;
+
+       init_vdso();
+       nerrs += init_vsys();
+
+       nerrs += test_gtod();
+       nerrs += test_time();
+       nerrs += test_getcpu(0);
+       nerrs += test_getcpu(1);
+
+       sethandler(SIGSEGV, sigsegv, 0);
+       nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+       nerrs += test_native_vsyscall();
+#endif
+
+       return nerrs ? 1 : 0;
+}