Merge tag 'mips_fixes_4.15_2' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 20 Jan 2018 19:37:00 +0000 (11:37 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 20 Jan 2018 19:37:00 +0000 (11:37 -0800)
Pull MIPS fixes from James Hogan:
 "Some final MIPS fixes for 4.15, including important build fixes and a
  MAINTAINERS update:

   - Add myself as MIPS co-maintainer.

   - Fix various all*config build failures (particularly as a result of
     switching the default MIPS platform to the "generic" platform).

   - Fix GCC7 build failures (duplicate const and questionable calls to
     missing __multi3 intrinsic on mips64r6).

   - Fix warnings when CPU Idle is enabled (4.14).

   - Fix AR7 serial output (since 3.17).

   - Fix ralink platform_get_irq error checking (since 3.12)"

* tag 'mips_fixes_4.15_2' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/mips:
  MAINTAINERS: Add James as MIPS co-maintainer
  MIPS: Fix undefined reference to physical_memsize
  MIPS: Implement __multi3 for GCC7 MIPS64r6 builds
  MIPS: mm: Fix duplicate "const" on insn_table_MM
  MIPS: CM: Drop WARN_ON(vp != 0)
  MIPS: ralink: Fix platform_get_irq's error checking
  MIPS: Fix CPS SMP NS16550 UART defaults
  MIPS: BCM47XX Avoid compile error with MIPS allnoconfig
  MIPS: RB532: Avoid undefined mac_pton without GENERIC_NET_UTILS
  MIPS: RB532: Avoid undefined early_serial_setup() without SERIAL_8250_CONSOLE
  MIPS: ath25: Avoid undefined early_serial_setup() without SERIAL_8250_CONSOLE
  MIPS: AR7: ensure the port type's FCR value is used

352 files changed:
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/admin-guide/kernel-parameters.txt
Documentation/filesystems/nilfs2.txt
Documentation/kbuild/kconfig-language.txt
Documentation/networking/index.rst
Documentation/networking/msg_zerocopy.rst
Documentation/usb/gadget-testing.txt
Documentation/x86/pti.txt [new file with mode: 0644]
MAINTAINERS
Makefile
arch/arm/boot/dts/da850-lcdk.dts
arch/arm/boot/dts/kirkwood-openblocks_a7.dts
arch/arm/boot/dts/sun4i-a10.dtsi
arch/arm/boot/dts/sun7i-a20.dtsi
arch/arm/configs/sunxi_defconfig
arch/arm/net/bpf_jit_32.c
arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
arch/arm64/net/bpf_jit_comp.c
arch/ia64/include/asm/atomic.h
arch/ia64/kernel/time.c
arch/powerpc/Kconfig
arch/powerpc/include/asm/exception-64e.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/feature-fixups.h
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/plpar_wrappers.h
arch/powerpc/include/asm/setup.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/book3s_rmhandlers.S
arch/powerpc/kvm/book3s_segment.S
arch/powerpc/lib/feature-fixups.c
arch/powerpc/platforms/powernv/setup.c
arch/powerpc/platforms/pseries/dlpar.c
arch/powerpc/platforms/pseries/pseries.h
arch/powerpc/platforms/pseries/ras.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/xmon/xmon.c
arch/riscv/configs/defconfig
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/io.h
arch/riscv/include/asm/irqflags.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/ptrace.h
arch/riscv/include/asm/tlbflush.h
arch/riscv/include/asm/uaccess.h
arch/riscv/include/asm/unistd.h
arch/riscv/include/asm/vdso-syscalls.h [deleted file]
arch/riscv/include/uapi/asm/syscalls.h [new file with mode: 0644]
arch/riscv/kernel/entry.S
arch/riscv/kernel/process.c
arch/riscv/kernel/syscall_table.c
arch/riscv/kernel/vdso/flush_icache.S
arch/riscv/mm/fault.c
arch/sh/boards/mach-se/770x/setup.c
arch/sh/include/mach-se/mach/se.h
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/crypto/aesni-intel_asm.S
arch/x86/crypto/camellia-aesni-avx-asm_64.S
arch/x86/crypto/camellia-aesni-avx2-asm_64.S
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/events/intel/bts.c
arch/x86/events/intel/rapl.c
arch/x86/include/asm/apic.h
arch/x86/include/asm/asm-prototypes.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/mem_encrypt.h
arch/x86/include/asm/mshyperv.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nospec-branch.h [new file with mode: 0644]
arch/x86/include/asm/pci_x86.h
arch/x86/include/asm/processor-flags.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/xen/hypercall.h
arch/x86/kernel/alternative.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/intel_rdt.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/scattered.c
arch/x86/kernel/ftrace_32.S
arch/x86/kernel/ftrace_64.S
arch/x86/kernel/head64.c
arch/x86/kernel/idt.c
arch/x86/kernel/irq_32.c
arch/x86/kernel/irqinit.c
arch/x86/kernel/setup.c
arch/x86/kernel/tboot.c
arch/x86/kernel/tsc.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/lib/Makefile
arch/x86/lib/checksum_32.S
arch/x86/lib/retpoline.S [new file with mode: 0644]
arch/x86/mm/fault.c
arch/x86/mm/kasan_init_64.c
arch/x86/mm/mem_encrypt.c
arch/x86/mm/mem_encrypt_boot.S
arch/x86/mm/pti.c
arch/x86/pci/common.c
arch/x86/pci/fixup.c
arch/x86/platform/efi/efi_64.c
arch/x86/platform/intel-mid/device_libs/platform_bt.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/xen-ops.h
crypto/algapi.c
drivers/ata/libata-core.c
drivers/base/Kconfig
drivers/base/cpu.c
drivers/bcma/Kconfig
drivers/block/rbd.c
drivers/gpio/gpio-mmio.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/gtt.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_drv.h
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_sprite.c
drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
drivers/gpu/drm/nouveau/nvkm/subdev/bar/gk20a.c
drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
drivers/gpu/drm/nouveau/nvkm/subdev/mmu/mcp77.c [new file with mode: 0644]
drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c [new file with mode: 0644]
drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c
drivers/gpu/drm/tegra/sor.c
drivers/gpu/drm/vc4/vc4_irq.c
drivers/gpu/drm/vc4/vc4_v3d.c
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
drivers/i2c/i2c-core-base.c
drivers/i2c/i2c-core-smbus.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/input/misc/twl4030-vibra.c
drivers/input/misc/twl6040-vibra.c
drivers/input/mouse/alps.c
drivers/input/mouse/alps.h
drivers/input/mouse/synaptics.c
drivers/input/rmi4/rmi_driver.c
drivers/input/touchscreen/88pm860x-ts.c
drivers/input/touchscreen/of_touchscreen.c
drivers/md/dm-crypt.c
drivers/md/dm-integrity.c
drivers/md/dm-thin-metadata.c
drivers/md/persistent-data/dm-btree.c
drivers/mmc/host/renesas_sdhi_core.c
drivers/mmc/host/s3cmci.c
drivers/mmc/host/sdhci-esdhc-imx.c
drivers/mux/core.c
drivers/net/can/usb/peak_usb/pcan_usb_fd.c
drivers/net/ethernet/cirrus/cs89x0.c
drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
drivers/net/ethernet/freescale/fs_enet/fs_enet.h
drivers/net/ethernet/freescale/gianfar_ptp.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/fm10k/fm10k_pci.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/uar.c
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/qlogic/qed/qed_spq.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/ti/netcp_core.c
drivers/net/ppp/ppp_generic.c
drivers/net/tun.c
drivers/net/usb/lan78xx.c
drivers/net/usb/r8152.c
drivers/net/wireless/ath/wcn36xx/main.c
drivers/net/wireless/ath/wcn36xx/pmc.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c
drivers/net/wireless/intel/iwlwifi/pcie/internal.h
drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
drivers/net/wireless/intel/iwlwifi/pcie/tx.c
drivers/net/wireless/mac80211_hwsim.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/pci.c
drivers/of/of_mdio.c
drivers/phy/phy-core.c
drivers/scsi/libsas/sas_scsi_host.c
drivers/ssb/Kconfig
drivers/staging/android/ashmem.c
drivers/usb/gadget/udc/core.c
drivers/usb/misc/usb3503.c
drivers/usb/mon/mon_bin.c
drivers/usb/serial/cp210x.c
drivers/usb/storage/unusual_uas.h
drivers/usb/usbip/usbip_common.c
drivers/usb/usbip/vudc_rx.c
drivers/usb/usbip/vudc_tx.c
drivers/xen/gntdev.c
fs/proc/array.c
include/linux/bpf.h
include/linux/compiler-gcc.h
include/linux/completion.h
include/linux/cpu.h
include/linux/crash_core.h
include/linux/delayacct.h
include/linux/irqflags.h
include/linux/lockdep.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/netlink.h
include/linux/ptr_ring.h
include/linux/vermagic.h
include/net/arp.h
include/net/cfg80211.h
include/net/sch_generic.h
include/net/tls.h
include/uapi/linux/openvswitch.h
init/Kconfig
kernel/bpf/arraymap.c
kernel/bpf/core.c
kernel/bpf/sockmap.c
kernel/bpf/verifier.c
kernel/cgroup/cgroup.c
kernel/crash_core.c
kernel/delayacct.c
kernel/futex.c
kernel/locking/rtmutex.c
kernel/locking/rtmutex_common.h
kernel/sched/completion.c
kernel/sched/core.c
kernel/sched/membarrier.c
kernel/time/timer.c
kernel/trace/Kconfig
kernel/trace/ring_buffer.c
kernel/trace/trace_events.c
kernel/workqueue.c
lib/test_bpf.c
mm/kmemleak.c
mm/memory.c
mm/page_owner.c
net/8021q/vlan.c
net/9p/trans_xen.c
net/bluetooth/l2cap_core.c
net/caif/caif_dev.c
net/caif/caif_usb.c
net/caif/cfcnfg.c
net/caif/cfctrl.c
net/can/af_can.c
net/core/filter.c
net/core/flow_dissector.c
net/core/neighbour.c
net/core/sock_diag.c
net/core/sysctl_net_core.c
net/ipv4/arp.c
net/ipv4/esp4.c
net/ipv4/esp4_offload.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv6/esp6.c
net/ipv6/esp6_offload.c
net/ipv6/exthdrs.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/key/af_key.c
net/netlink/af_netlink.c
net/openvswitch/flow_netlink.c
net/sched/cls_bpf.c
net/sched/sch_api.c
net/sched/sch_generic.c
net/sched/sch_ingress.c
net/sctp/ipv6.c
net/sctp/outqueue.c
net/sctp/socket.c
net/socket.c
net/tipc/node.c
net/tls/tls_main.c
net/tls/tls_sw.c
net/wireless/core.c
net/wireless/core.h
net/wireless/nl80211.c
net/wireless/reg.c
net/wireless/wext-compat.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
scripts/Makefile.build
scripts/decodecode
scripts/gdb/linux/tasks.py
scripts/genksyms/.gitignore
scripts/kconfig/expr.c
security/Kconfig
security/apparmor/domain.c
security/apparmor/include/perms.h
security/apparmor/ipc.c
sound/core/oss/pcm_oss.c
sound/core/oss/pcm_plugin.c
sound/core/pcm_lib.c
sound/core/pcm_native.c
sound/core/seq/seq_clientmgr.c
sound/core/seq/seq_clientmgr.h
sound/drivers/aloop.c
sound/pci/hda/patch_cirrus.c
sound/pci/hda/patch_realtek.c
tools/objtool/Makefile
tools/objtool/check.c
tools/objtool/check.h
tools/objtool/elf.c
tools/testing/selftests/bpf/test_align.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/test_vsyscall.c [new file with mode: 0644]

index d6d862db3b5d65fe09c26783298bba21ceec5558..bfd29bc8d37af1bbc4913deee9d941b1692bedda 100644 (file)
@@ -375,3 +375,19 @@ Contact:   Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   information about CPUs heterogeneity.
 
                cpu_capacity: capacity of cpu#.
+
+What:          /sys/devices/system/cpu/vulnerabilities
+               /sys/devices/system/cpu/vulnerabilities/meltdown
+               /sys/devices/system/cpu/vulnerabilities/spectre_v1
+               /sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date:          January 2018
+Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:   Information about CPU vulnerabilities
+
+               The files are named after the code names of CPU
+               vulnerabilities. The output of those files reflects the
+               state of the CPUs in the system. Possible output values:
+
+               "Not affected"    CPU is not affected by the vulnerability
+               "Vulnerable"      CPU is affected and no mitigation in effect
+               "Mitigation: $M"  CPU is affected and mitigation $M is in effect
index af7104aaffd92f1eb3db02df44ffc33e7df78adc..46b26bfee27ba81267703a1ceac7c0e082f1e5ec 100644 (file)
                        It will be ignored when crashkernel=X,high is not used
                        or memory reserved is below 4G.
 
-       crossrelease_fullstack
-                       [KNL] Allow to record full stack trace in cross-release
-
        cryptomgr.notests
                         [KNL] Disable crypto self-tests
 
        nosmt           [KNL,S390] Disable symmetric multithreading (SMT).
                        Equivalent to smt=1.
 
+       nospectre_v2    [X86] Disable all mitigations for the Spectre variant 2
+                       (indirect branch prediction) vulnerability. System may
+                       allow data leaks with this option, which is equivalent
+                       to spectre_v2=off.
+
        noxsave         [BUGS=X86] Disables x86 extended register state save
                        and restore using xsave. The kernel will fallback to
                        enabling legacy floating-point and sse state.
                        steal time is computed, but won't influence scheduler
                        behaviour
 
-       nopti           [X86-64] Disable kernel page table isolation
-
        nolapic         [X86-32,APIC] Do not enable or use the local APIC.
 
        nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
                pcie_scan_all   Scan all possible PCIe devices.  Otherwise we
                                only look for one device below a PCIe downstream
                                port.
+               big_root_window Try to add a big 64bit memory window to the PCIe
+                               root complex on AMD CPUs. Some GFX hardware
+                               can resize a BAR to allow access to all VRAM.
+                               Adding the window is slightly risky (it may
+                               conflict with unreported devices), so this
+                               taints the kernel.
 
        pcie_aspm=      [PCIE] Forcibly enable or disable PCIe Active State Power
                        Management.
        pt.             [PARIDE]
                        See Documentation/blockdev/paride.txt.
 
-       pti=            [X86_64]
-                       Control user/kernel address space isolation:
-                       on - enable
-                       off - disable
-                       auto - default setting
+       pti=            [X86_64] Control Page Table Isolation of user and
+                       kernel address spaces.  Disabling this feature
+                       removes hardening, but improves performance of
+                       system calls and interrupts.
+
+                       on   - unconditionally enable
+                       off  - unconditionally disable
+                       auto - kernel detects whether your CPU model is
+                              vulnerable to issues that PTI mitigates
+
+                       Not specifying this option is equivalent to pti=auto.
+
+       nopti           [X86_64]
+                       Equivalent to pti=off
 
        pty.legacy_count=
                        [KNL] Number of legacy pty's. Overwrites compiled-in
        sonypi.*=       [HW] Sony Programmable I/O Control Device driver
                        See Documentation/laptops/sonypi.txt
 
+       spectre_v2=     [X86] Control mitigation of Spectre variant 2
+                       (indirect branch speculation) vulnerability.
+
+                       on   - unconditionally enable
+                       off  - unconditionally disable
+                       auto - kernel detects whether your CPU model is
+                              vulnerable
+
+                       Selecting 'on' will, and 'auto' may, choose a
+                       mitigation method at run time according to the
+                       CPU, the available microcode, the setting of the
+                       CONFIG_RETPOLINE configuration option, and the
+                       compiler with which the kernel was built.
+
+                       Specific mitigations can also be selected manually:
+
+                       retpoline         - replace indirect branches
+                       retpoline,generic - google's original retpoline
+                       retpoline,amd     - AMD-specific minimal thunk
+
+                       Not specifying this option is equivalent to
+                       spectre_v2=auto.
+
        spia_io_base=   [HW,MTD]
        spia_fio_base=
        spia_pedr=
index c0727dc36271e99eaf844c205fff6d901c3d057b..f2f3f8592a6f5e12bdcd2d56005deaf52030d0f7 100644 (file)
@@ -25,8 +25,8 @@ available from the following download page.  At least "mkfs.nilfs2",
 cleaner or garbage collector) are required.  Details on the tools are
 described in the man pages included in the package.
 
-Project web page:    http://nilfs.sourceforge.net/
-Download page:       http://nilfs.sourceforge.net/en/download.html
+Project web page:    https://nilfs.sourceforge.io/
+Download page:       https://nilfs.sourceforge.io/en/download.html
 List info:           http://vger.kernel.org/vger-lists.html#linux-nilfs
 
 Caveats
index 262722d8867b2aa9f70f137529dd298e5e36af60..c4a293a03c337f5080d9cf682f64428fc74a1d79 100644 (file)
@@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax:
 <expr> ::= <symbol>                             (1)
            <symbol> '=' <symbol>                (2)
            <symbol> '!=' <symbol>               (3)
-           '(' <expr> ')'                       (4)
-           '!' <expr>                           (5)
-           <expr> '&&' <expr>                   (6)
-           <expr> '||' <expr>                   (7)
+           <symbol1> '<' <symbol2>              (4)
+           <symbol1> '>' <symbol2>              (4)
+           <symbol1> '<=' <symbol2>             (4)
+           <symbol1> '>=' <symbol2>             (4)
+           '(' <expr> ')'                       (5)
+           '!' <expr>                           (6)
+           <expr> '&&' <expr>                   (7)
+           <expr> '||' <expr>                   (8)
 
 Expressions are listed in decreasing order of precedence. 
 
@@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence.
     otherwise 'n'.
 (3) If the values of both symbols are equal, it returns 'n',
     otherwise 'y'.
-(4) Returns the value of the expression. Used to override precedence.
-(5) Returns the result of (2-/expr/).
-(6) Returns the result of min(/expr/, /expr/).
-(7) Returns the result of max(/expr/, /expr/).
+(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
+    or greater-or-equal than value of <symbol2>, it returns 'y',
+    otherwise 'n'.
+(5) Returns the value of the expression. Used to override precedence.
+(6) Returns the result of (2-/expr/).
+(7) Returns the result of min(/expr/, /expr/).
+(8) Returns the result of max(/expr/, /expr/).
 
 An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
 respectively for calculations). A menu entry becomes visible when its
index 66e62086624501fe76c170f77efc1cbc0844032f..7d4b15977d61201eb8b265293d3d95a6b042c7e6 100644 (file)
@@ -9,6 +9,7 @@ Contents:
    batman-adv
    kapi
    z8530book
+   msg_zerocopy
 
 .. only::  subproject
 
@@ -16,4 +17,3 @@ Contents:
    =======
 
    * :ref:`genindex`
-
index 77f6d7e25cfda65ac36e0c47bb2f8bdacc2fee2a..291a012649678035b5d0fdc306904093ceedf610 100644 (file)
@@ -72,6 +72,10 @@ this flag, a process must first signal intent by setting a socket option:
        if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
                error(1, errno, "setsockopt zerocopy");
 
+Setting the socket option only works when the socket is in its initial
+(TCP_CLOSED) state.  Trying to set the option for a socket returned by accept(),
+for example, will lead to an EBUSY error. In this case, the option should be set
+to the listening socket and it will be inherited by the accepted sockets.
 
 Transmission
 ------------
index 441a4b9b666fbb2b2aace2cbc2f5ab542635d553..5908a21fddb6035cd25bb759bfd01ee405371163 100644 (file)
@@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value
 in each line. The rules stated above are best illustrated with an example:
 
 # mkdir functions/uvc.usb0/control/header/h
-# cd functions/uvc.usb0/control/header/h
+# cd functions/uvc.usb0/control/
 # ln -s header/h class/fs
 # ln -s header/h class/ss
 # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
new file mode 100644 (file)
index 0000000..d11eff6
--- /dev/null
@@ -0,0 +1,186 @@
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications.  When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy.  When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT).  There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled.  It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI.  This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level.  This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel.  This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal.  The only difference is when the kernel
+makes entries in the top (PGD) level.  In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables.  This leaves a single, shared set of
+userspace page tables to manage.  One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important.  But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+  a. Each process now needs an order-1 PGD instead of order-0.
+     (Consumes an additional 4k per process).
+  b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+     aligned so that it can be mapped by setting a single PMD
+     entry.  This consumes nearly 2MB of RAM once the kernel
+     is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+  a. CR3 manipulation to switch between the page table copies
+     must be done at interrupt, syscall, and exception entry
+     and exit (it can be skipped when the kernel is interrupted,
+     though.)  Moves to CR3 are on the order of a hundred
+     cycles, and are required at every entry and exit.
+  b. A "trampoline" must be used for SYSCALL entry.  This
+     trampoline depends on a smaller set of resources than the
+     non-PTI SYSCALL entry code, so requires mapping fewer
+     things into the userspace page tables.  The downside is
+     that stacks must be switched at entry time.
+  d. Global pages are disabled for all kernel structures not
+     mapped into both kernel and userspace page tables.  This
+     feature of the MMU allows different processes to share TLB
+     entries mapping the kernel.  Losing the feature means more
+     TLB misses after a context switch.  The actual loss of
+     performance is very small, however, never exceeding 1%.
+  d. Process Context IDentifiers (PCID) is a CPU feature that
+     allows us to skip flushing the entire TLB when switching page
+     tables by setting a special bit in CR3 when the page tables
+     are changed.  This makes switching the page tables (at context
+     switch, or kernel entry/exit) cheaper.  But, on systems with
+     PCID support, the context switch code must flush both the user
+     and kernel entries out of the TLB.  The user PCID TLB flush is
+     deferred until the exit to userspace, minimizing the cost.
+     See intel.com/sdm for the gory PCID/INVPCID details.
+  e. The userspace page tables must be populated for each new
+     process.  Even without PTI, the shared kernel mappings
+     are created by copying top-level (PGD) entries into each
+     new process.  But, with PTI, there are now *two* kernel
+     mappings: one in the kernel page tables that maps everything
+     and one for the entry/exit structures.  At fork(), we need to
+     copy both.
+  f. In addition to the fork()-time copying, there must also
+     be an update to the userspace PGD any time a set_pgd() is done
+     on a PGD used to map userspace.  This ensures that the kernel
+     and userspace copies always map the same userspace
+     memory.
+  g. On systems without PCID support, each CR3 write flushes
+     the entire TLB.  That means that each syscall, interrupt
+     or exception flushes the TLB.
+  h. INVPCID is a TLB-flushing instruction which allows flushing
+     of TLB entries for non-current PCIDs.  Some systems support
+     PCIDs, but do not support INVPCID.  On these systems, addresses
+     can only be flushed from the TLB for the current PCID.  When
+     flushing a kernel address, we need to flush all PCIDs, so a
+     single kernel address flush will require a TLB-flushing CR3
+     write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+   unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+   boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+   (excluding MPX and protection_keys) in a loop on multiple CPUs for
+   several minutes.  These tests frequently uncover corner cases in the
+   kernel entry code.  In general, old kernels might cause these tests
+   themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+   frequent performance monitoring non-maskable interrupts (see "NMI"
+   in /proc/interrupts).  This exercises the NMI entry/exit code which
+   is known to trigger bugs in code paths that did not expect to be
+   interrupted, including nested NMIs.  Using "-c" boosts the rate of
+   NMIs, and using two -c with separate counters encourages nested NMIs
+   and less deterministic behavior.
+
+       while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+   This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code.  Usually a bug in one of the
+   more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup.  Bugs
+   in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt.  Caused by bugs in entry_64.S,
+   like screwing up a page table switch.  Also caused by
+   incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI.  The NMI code is separate from main
+   interrupt handlers and can have bugs that do not affect
+   normal interrupts.  Also caused by incorrectly mapping NMI
+   code.  NMIs that interrupt the entry code must be very
+   careful and can be the cause of crashes that show up when
+   running perf.
+ * Kernel crashes at the first exit to userspace.  entry_64.S
+   bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+   in entry_64.S that return to userspace are sometimes separate
+   from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+   faults upon page faults.  Caused by touching non-pti-mapped
+   data in the entry code, or forgetting to switch to kernel
+   CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+   as mount(8) failing to mount the rootfs.  These have
+   tended to be TLB invalidation issues.  Usually invalidating
+   the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf
index 018d50ed382e6365e9ae7784883458818b0b1ddc..e3581413420c61aa95cb04939576c84f3a2a3782 100644 (file)
@@ -9639,8 +9639,8 @@ F:        include/uapi/linux/sunrpc/
 NILFS2 FILESYSTEM
 M:     Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
 L:     linux-nilfs@vger.kernel.org
-W:     http://nilfs.sourceforge.net/
-W:     http://nilfs.osdn.jp/
+W:     https://nilfs.sourceforge.io/
+W:     https://nilfs.osdn.jp/
 T:     git git://github.com/konis/nilfs2.git
 S:     Supported
 F:     Documentation/filesystems/nilfs2.txt
index eb59638035dd6fe1d0e7aac80b1357aae565c7eb..bf5b8cbb9469db3ab4a42e636746cab82a093044 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 15
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -484,26 +484,6 @@ CLANG_GCC_TC       := --gcc-toolchain=$(GCC_TOOLCHAIN)
 endif
 KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
-KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
-KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
-# Quiet clang warning: comparison of unsigned expression < 0 is always false
-KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
-# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
-# source of a reference will be _MergedGlobals and not on of the whitelisted names.
-# See modpost pattern 2
-KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
-KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
-KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
-KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
-else
-
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 endif
 
 ifeq ($(config-targets),1)
@@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR
 endif
 KBUILD_CFLAGS += $(stackp-flag)
 
+ifeq ($(cc-name),clang)
+KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
+KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+# Quiet clang warning: comparison of unsigned expression < 0 is always false
+KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
+# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
+# source of a reference will be _MergedGlobals and not on of the whitelisted names.
+# See modpost pattern 2
+KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
+KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
+KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
+KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
+else
+
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+endif
+
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
index eed89e659143a878c9a62159c52c834340c3a61b..a1f4d6d5a569bbb740b6a5d27f7232155853ec4a 100644 (file)
                                        label = "u-boot env";
                                        reg = <0 0x020000>;
                                };
-                               partition@0x020000 {
+                               partition@20000 {
                                        /* The LCDK defaults to booting from this partition */
                                        label = "u-boot";
                                        reg = <0x020000 0x080000>;
                                };
-                               partition@0x0a0000 {
+                               partition@a0000 {
                                        label = "free space";
                                        reg = <0x0a0000 0>;
                                };
index cf2f5240e176d0b1d60f578db6028f143bfea97c..27cc913ca0f569e840762fc600034247819a1937 100644 (file)
@@ -53,7 +53,8 @@
                };
 
                pinctrl: pin-controller@10000 {
-                       pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>;
+                       pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header
+                                    &pmx_gpio_header_gpo>;
                        pinctrl-names = "default";
 
                        pmx_uart0: pmx-uart0 {
                         * ground.
                         */
                        pmx_gpio_header: pmx-gpio-header {
-                               marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28",
+                               marvell,pins = "mpp17", "mpp29", "mpp28",
                                               "mpp35", "mpp34", "mpp40";
                                marvell,function = "gpio";
                        };
 
+                       pmx_gpio_header_gpo: pxm-gpio-header-gpo {
+                               marvell,pins = "mpp7";
+                               marvell,function = "gpo";
+                       };
+
                        pmx_gpio_init: pmx-init {
                                marvell,pins = "mpp38";
                                marvell,function = "gpio";
index 5840f5c75c3b388d47242c53aa0c0f7fbe3e3a68..4f2f2eea07552db91e67b5fb6fd9e2bd469b7905 100644 (file)
 
                                        be1_out_tcon0: endpoint@0 {
                                                reg = <0>;
-                                               remote-endpoint = <&tcon1_in_be0>;
+                                               remote-endpoint = <&tcon0_in_be1>;
                                        };
 
                                        be1_out_tcon1: endpoint@1 {
index 59655e42e4b09a75edc0b285e2c18fd4926e21aa..bd0cd3204273d3fc4434ce2ac7ac2cf92deb4ef4 100644 (file)
 
                                        be1_out_tcon0: endpoint@0 {
                                                reg = <0>;
-                                               remote-endpoint = <&tcon1_in_be0>;
+                                               remote-endpoint = <&tcon0_in_be1>;
                                        };
 
                                        be1_out_tcon1: endpoint@1 {
index 5caaf971fb500c8265bfd75350578cf3071ca90b..df433abfcb0284ec5d9df989f1f30957ba86f6fe 100644 (file)
@@ -10,6 +10,7 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=8
 CONFIG_AEABI=y
 CONFIG_HIGHMEM=y
+CONFIG_CMA=y
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_CPU_FREQ=y
@@ -33,6 +34,7 @@ CONFIG_CAN_SUN4I=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DMA_CMA=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_ATA=y
 CONFIG_AHCI_SUNXI=y
index c199990e12b620fe40f34215abf2d577e6f90c70..323a4df59a6c0ec6657b42b714b161a6d3729cca 100644 (file)
 
 int bpf_jit_enable __read_mostly;
 
+/*
+ * eBPF prog stack layout:
+ *
+ *                         high
+ * original ARM_SP =>     +-----+
+ *                        |     | callee saved registers
+ *                        +-----+ <= (BPF_FP + SCRATCH_SIZE)
+ *                        | ... | eBPF JIT scratch space
+ * eBPF fp register =>    +-----+
+ *   (BPF_FP)             | ... | eBPF prog stack
+ *                        +-----+
+ *                        |RSVD | JIT scratchpad
+ * current ARM_SP =>      +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE)
+ *                        |     |
+ *                        | ... | Function call stack
+ *                        |     |
+ *                        +-----+
+ *                          low
+ *
+ * The callee saved registers depends on whether frame pointers are enabled.
+ * With frame pointers (to be compliant with the ABI):
+ *
+ *                                high
+ * original ARM_SP =>     +------------------+ \
+ *                        |        pc        | |
+ * current ARM_FP =>      +------------------+ } callee saved registers
+ *                        |r4-r8,r10,fp,ip,lr| |
+ *                        +------------------+ /
+ *                                low
+ *
+ * Without frame pointers:
+ *
+ *                                high
+ * original ARM_SP =>     +------------------+
+ *                        | r4-r8,r10,fp,lr  | callee saved registers
+ * current ARM_FP =>      +------------------+
+ *                                low
+ *
+ * When popping registers off the stack at the end of a BPF function, we
+ * reference them via the current ARM_FP register.
+ */
+#define CALLEE_MASK    (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
+                        1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \
+                        1 << ARM_FP)
+#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR)
+#define CALLEE_POP_MASK  (CALLEE_MASK | 1 << ARM_PC)
+
 #define STACK_OFFSET(k)        (k)
 #define TMP_REG_1      (MAX_BPF_JIT_REG + 0)   /* TEMP Register 1 */
 #define TMP_REG_2      (MAX_BPF_JIT_REG + 1)   /* TEMP Register 2 */
 #define TCALL_CNT      (MAX_BPF_JIT_REG + 2)   /* Tail Call Count */
 
-/* Flags used for JIT optimization */
-#define SEEN_CALL      (1 << 0)
-
 #define FLAG_IMM_OVERFLOW      (1 << 0)
 
 /*
@@ -95,7 +139,6 @@ static const u8 bpf2a32[][2] = {
  * idx                 :       index of current last JITed instruction.
  * prologue_bytes      :       bytes used in prologue.
  * epilogue_offset     :       offset of epilogue starting.
- * seen                        :       bit mask used for JIT optimization.
  * offsets             :       array of eBPF instruction offsets in
  *                             JITed code.
  * target              :       final JITed code.
@@ -110,7 +153,6 @@ struct jit_ctx {
        unsigned int idx;
        unsigned int prologue_bytes;
        unsigned int epilogue_offset;
-       u32 seen;
        u32 flags;
        u32 *offsets;
        u32 *target;
@@ -179,8 +221,13 @@ static void jit_fill_hole(void *area, unsigned int size)
                *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
 }
 
-/* Stack must be multiples of 16 Bytes */
-#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
+#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
+/* EABI requires the stack to be aligned to 64-bit boundaries */
+#define STACK_ALIGNMENT        8
+#else
+/* Stack must be aligned to 32-bit boundaries */
+#define STACK_ALIGNMENT        4
+#endif
 
 /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
  * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
@@ -194,7 +241,7 @@ static void jit_fill_hole(void *area, unsigned int size)
         + SCRATCH_SIZE + \
         + 4 /* extra for skb_copy_bits buffer */)
 
-#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
+#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
 
 /* Get the offset of eBPF REGISTERs stored on scratch space. */
 #define STACK_VAR(off) (STACK_SIZE-off-4)
@@ -285,16 +332,19 @@ static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
                emit_mov_i_no8m(rd, val, ctx);
 }
 
-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx)
 {
-       ctx->seen |= SEEN_CALL;
-#if __LINUX_ARM_ARCH__ < 5
-       emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
-
        if (elf_hwcap & HWCAP_THUMB)
                emit(ARM_BX(tgt_reg), ctx);
        else
                emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
+}
+
+static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
+{
+#if __LINUX_ARM_ARCH__ < 5
+       emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+       emit_bx_r(tgt_reg, ctx);
 #else
        emit(ARM_BLX_R(tgt_reg), ctx);
 #endif
@@ -354,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
        }
 
        /* Call appropriate function */
-       ctx->seen |= SEEN_CALL;
        emit_mov_i(ARM_IP, op == BPF_DIV ?
                   (u32)jit_udiv32 : (u32)jit_mod32, ctx);
        emit_blx_r(ARM_IP, ctx);
@@ -620,8 +669,6 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
        /* Do LSH operation */
        emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
        emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
-       /* As we are using ARM_LR */
-       ctx->seen |= SEEN_CALL;
        emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
        emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
        emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
@@ -656,8 +703,6 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
        /* Do the ARSH operation */
        emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
        emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
-       /* As we are using ARM_LR */
-       ctx->seen |= SEEN_CALL;
        emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
        emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
        _emit(ARM_COND_MI, ARM_B(0), ctx);
@@ -692,8 +737,6 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
        /* Do LSH operation */
        emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
        emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
-       /* As we are using ARM_LR */
-       ctx->seen |= SEEN_CALL;
        emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
        emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
        emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
@@ -828,8 +871,6 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
        /* Do Multiplication */
        emit(ARM_MUL(ARM_IP, rd, rn), ctx);
        emit(ARM_MUL(ARM_LR, rm, rt), ctx);
-       /* As we are using ARM_LR */
-       ctx->seen |= SEEN_CALL;
        emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx);
 
        emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx);
@@ -872,33 +913,53 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk,
 }
 
 /* dst = *(size*)(src + off) */
-static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk,
-                             const s32 off, struct jit_ctx *ctx, const u8 sz){
+static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk,
+                             s32 off, struct jit_ctx *ctx, const u8 sz){
        const u8 *tmp = bpf2a32[TMP_REG_1];
-       u8 rd = dstk ? tmp[1] : dst;
+       const u8 *rd = dstk ? tmp : dst;
        u8 rm = src;
+       s32 off_max;
 
-       if (off) {
+       if (sz == BPF_H)
+               off_max = 0xff;
+       else
+               off_max = 0xfff;
+
+       if (off < 0 || off > off_max) {
                emit_a32_mov_i(tmp[0], off, false, ctx);
                emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
                rm = tmp[0];
+               off = 0;
+       } else if (rd[1] == rm) {
+               emit(ARM_MOV_R(tmp[0], rm), ctx);
+               rm = tmp[0];
        }
        switch (sz) {
-       case BPF_W:
-               /* Load a Word */
-               emit(ARM_LDR_I(rd, rm, 0), ctx);
+       case BPF_B:
+               /* Load a Byte */
+               emit(ARM_LDRB_I(rd[1], rm, off), ctx);
+               emit_a32_mov_i(dst[0], 0, dstk, ctx);
                break;
        case BPF_H:
                /* Load a HalfWord */
-               emit(ARM_LDRH_I(rd, rm, 0), ctx);
+               emit(ARM_LDRH_I(rd[1], rm, off), ctx);
+               emit_a32_mov_i(dst[0], 0, dstk, ctx);
                break;
-       case BPF_B:
-               /* Load a Byte */
-               emit(ARM_LDRB_I(rd, rm, 0), ctx);
+       case BPF_W:
+               /* Load a Word */
+               emit(ARM_LDR_I(rd[1], rm, off), ctx);
+               emit_a32_mov_i(dst[0], 0, dstk, ctx);
+               break;
+       case BPF_DW:
+               /* Load a Double Word */
+               emit(ARM_LDR_I(rd[1], rm, off), ctx);
+               emit(ARM_LDR_I(rd[0], rm, off + 4), ctx);
                break;
        }
        if (dstk)
-               emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+               emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx);
+       if (dstk && sz == BPF_DW)
+               emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx);
 }
 
 /* Arithmatic Operation */
@@ -906,7 +967,6 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm,
                             const u8 rn, struct jit_ctx *ctx, u8 op) {
        switch (op) {
        case BPF_JSET:
-               ctx->seen |= SEEN_CALL;
                emit(ARM_AND_R(ARM_IP, rt, rn), ctx);
                emit(ARM_AND_R(ARM_LR, rd, rm), ctx);
                emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx);
@@ -945,7 +1005,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
        const u8 *tcc = bpf2a32[TCALL_CNT];
        const int idx0 = ctx->idx;
 #define cur_offset (ctx->idx - idx0)
-#define jmp_offset (out_offset - (cur_offset))
+#define jmp_offset (out_offset - (cur_offset) - 2)
        u32 off, lo, hi;
 
        /* if (index >= array->map.max_entries)
@@ -956,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
        emit_a32_mov_i(tmp[1], off, false, ctx);
        emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx);
        emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx);
-       /* index (64 bit) */
+       /* index is 32-bit for arrays */
        emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx);
        /* index >= array->map.max_entries */
        emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx);
@@ -997,7 +1057,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
        emit_a32_mov_i(tmp2[1], off, false, ctx);
        emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx);
        emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx);
-       emit(ARM_BX(tmp[1]), ctx);
+       emit_bx_r(tmp[1], ctx);
 
        /* out: */
        if (out_offset == -1)
@@ -1070,54 +1130,22 @@ static void build_prologue(struct jit_ctx *ctx)
        const u8 r2 = bpf2a32[BPF_REG_1][1];
        const u8 r3 = bpf2a32[BPF_REG_1][0];
        const u8 r4 = bpf2a32[BPF_REG_6][1];
-       const u8 r5 = bpf2a32[BPF_REG_6][0];
-       const u8 r6 = bpf2a32[TMP_REG_1][1];
-       const u8 r7 = bpf2a32[TMP_REG_1][0];
-       const u8 r8 = bpf2a32[TMP_REG_2][1];
-       const u8 r10 = bpf2a32[TMP_REG_2][0];
        const u8 fplo = bpf2a32[BPF_REG_FP][1];
        const u8 fphi = bpf2a32[BPF_REG_FP][0];
-       const u8 sp = ARM_SP;
        const u8 *tcc = bpf2a32[TCALL_CNT];
 
-       u16 reg_set = 0;
-
-       /*
-        * eBPF prog stack layout
-        *
-        *                         high
-        * original ARM_SP =>     +-----+ eBPF prologue
-        *                        |FP/LR|
-        * current ARM_FP =>      +-----+
-        *                        | ... | callee saved registers
-        * eBPF fp register =>    +-----+ <= (BPF_FP)
-        *                        | ... | eBPF JIT scratch space
-        *                        |     | eBPF prog stack
-        *                        +-----+
-        *                        |RSVD | JIT scratchpad
-        * current A64_SP =>      +-----+ <= (BPF_FP - STACK_SIZE)
-        *                        |     |
-        *                        | ... | Function call stack
-        *                        |     |
-        *                        +-----+
-        *                          low
-        */
-
        /* Save callee saved registers. */
-       reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
 #ifdef CONFIG_FRAME_POINTER
-       reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC);
-       emit(ARM_MOV_R(ARM_IP, sp), ctx);
+       u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC;
+       emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
        emit(ARM_PUSH(reg_set), ctx);
        emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
 #else
-       /* Check if call instruction exists in BPF body */
-       if (ctx->seen & SEEN_CALL)
-               reg_set |= (1<<ARM_LR);
-       emit(ARM_PUSH(reg_set), ctx);
+       emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
+       emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
 #endif
        /* Save frame pointer for later */
-       emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx);
+       emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
 
        ctx->stack_size = imm8m(STACK_SIZE);
 
@@ -1140,33 +1168,19 @@ static void build_prologue(struct jit_ctx *ctx)
        /* end of prologue */
 }
 
+/* restore callee saved registers. */
 static void build_epilogue(struct jit_ctx *ctx)
 {
-       const u8 r4 = bpf2a32[BPF_REG_6][1];
-       const u8 r5 = bpf2a32[BPF_REG_6][0];
-       const u8 r6 = bpf2a32[TMP_REG_1][1];
-       const u8 r7 = bpf2a32[TMP_REG_1][0];
-       const u8 r8 = bpf2a32[TMP_REG_2][1];
-       const u8 r10 = bpf2a32[TMP_REG_2][0];
-       u16 reg_set = 0;
-
-       /* unwind function call stack */
-       emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
-
-       /* restore callee saved registers. */
-       reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10);
 #ifdef CONFIG_FRAME_POINTER
-       /* the first instruction of the prologue was: mov ip, sp */
-       reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC);
+       /* When using frame pointers, some additional registers need to
+        * be loaded. */
+       u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP;
+       emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx);
        emit(ARM_LDM(ARM_SP, reg_set), ctx);
 #else
-       if (ctx->seen & SEEN_CALL)
-               reg_set |= (1<<ARM_PC);
        /* Restore callee saved registers. */
-       emit(ARM_POP(reg_set), ctx);
-       /* Return back to the callee function */
-       if (!(ctx->seen & SEEN_CALL))
-               emit(ARM_BX(ARM_LR), ctx);
+       emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx);
+       emit(ARM_POP(CALLEE_POP_MASK), ctx);
 #endif
 }
 
@@ -1394,8 +1408,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                        emit_rev32(rt, rt, ctx);
                        goto emit_bswap_uxt;
                case 64:
-                       /* Because of the usage of ARM_LR */
-                       ctx->seen |= SEEN_CALL;
                        emit_rev32(ARM_LR, rt, ctx);
                        emit_rev32(rt, rd, ctx);
                        emit(ARM_MOV_R(rd, ARM_LR), ctx);
@@ -1448,22 +1460,7 @@ exit:
                rn = sstk ? tmp2[1] : src_lo;
                if (sstk)
                        emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx);
-               switch (BPF_SIZE(code)) {
-               case BPF_W:
-                       /* Load a Word */
-               case BPF_H:
-                       /* Load a Half-Word */
-               case BPF_B:
-                       /* Load a Byte */
-                       emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code));
-                       emit_a32_mov_i(dst_hi, 0, dstk, ctx);
-                       break;
-               case BPF_DW:
-                       /* Load a double word */
-                       emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W);
-                       emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W);
-                       break;
-               }
+               emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code));
                break;
        /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */
        case BPF_LD | BPF_ABS | BPF_W:
index 7c9bdc7ab50bcfd12764d7206bb0fb75e30ec8c1..9db19314c60cda7de981992f55f2e623b92fcd00 100644 (file)
@@ -66,6 +66,7 @@
                                     <&cpu1>,
                                     <&cpu2>,
                                     <&cpu3>;
+               interrupt-parent = <&intc>;
        };
 
        psci {
index e3b64d03fbd82844aaf65fc862e62d15af042133..9c7724e82aff20b14cc19da42c76fef92aa90723 100644 (file)
                        cpm_ethernet: ethernet@0 {
                                compatible = "marvell,armada-7k-pp22";
                                reg = <0x0 0x100000>, <0x129000 0xb000>;
-                               clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, <&cpm_clk 1 5>;
-                               clock-names = "pp_clk", "gop_clk", "mg_clk";
+                               clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>,
+                                        <&cpm_clk 1 5>, <&cpm_clk 1 18>;
+                               clock-names = "pp_clk", "gop_clk",
+                                             "mg_clk","axi_clk";
                                marvell,system-controller = <&cpm_syscon0>;
                                status = "disabled";
                                dma-coherent;
                                #size-cells = <0>;
                                compatible = "marvell,orion-mdio";
                                reg = <0x12a200 0x10>;
-                               clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>;
+                               clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>,
+                                        <&cpm_clk 1 6>, <&cpm_clk 1 18>;
                                status = "disabled";
                        };
 
                                compatible = "marvell,armada-cp110-sdhci";
                                reg = <0x780000 0x300>;
                                interrupts = <ICU_GRP_NSR 27 IRQ_TYPE_LEVEL_HIGH>;
-                               clock-names = "core";
-                               clocks = <&cpm_clk 1 4>;
+                               clock-names = "core","axi";
+                               clocks = <&cpm_clk 1 4>, <&cpm_clk 1 18>;
                                dma-coherent;
                                status = "disabled";
                        };
index 0d51096c69f802e68cbf1e6392e26e0a553138ec..87ac68b2cf37fab1dc1c68b5a7112af7524da083 100644 (file)
                        cps_ethernet: ethernet@0 {
                                compatible = "marvell,armada-7k-pp22";
                                reg = <0x0 0x100000>, <0x129000 0xb000>;
-                               clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, <&cps_clk 1 5>;
-                               clock-names = "pp_clk", "gop_clk", "mg_clk";
+                               clocks = <&cps_clk 1 3>, <&cps_clk 1 9>,
+                                        <&cps_clk 1 5>, <&cps_clk 1 18>;
+                               clock-names = "pp_clk", "gop_clk",
+                                             "mg_clk", "axi_clk";
                                marvell,system-controller = <&cps_syscon0>;
                                status = "disabled";
                                dma-coherent;
                                #size-cells = <0>;
                                compatible = "marvell,orion-mdio";
                                reg = <0x12a200 0x10>;
-                               clocks = <&cps_clk 1 9>, <&cps_clk 1 5>;
+                               clocks = <&cps_clk 1 9>, <&cps_clk 1 5>,
+                                        <&cps_clk 1 6>, <&cps_clk 1 18>;
                                status = "disabled";
                        };
 
index ba38d403abb2fc92d8ea6ae9a6c3c38e70979062..bb32f7f6dd0f967fd1435ad539d922b69650fb2b 100644 (file)
@@ -148,7 +148,8 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
 /* Stack must be multiples of 16B */
 #define STACK_ALIGN(sz) (((sz) + 15) & ~15)
 
-#define PROLOGUE_OFFSET 8
+/* Tail call offset to jump into */
+#define PROLOGUE_OFFSET 7
 
 static int build_prologue(struct jit_ctx *ctx)
 {
@@ -200,19 +201,19 @@ static int build_prologue(struct jit_ctx *ctx)
        /* Initialize tail_call_cnt */
        emit(A64_MOVZ(1, tcc, 0, 0), ctx);
 
-       /* 4 byte extra for skb_copy_bits buffer */
-       ctx->stack_size = prog->aux->stack_depth + 4;
-       ctx->stack_size = STACK_ALIGN(ctx->stack_size);
-
-       /* Set up function call stack */
-       emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
-
        cur_offset = ctx->idx - idx0;
        if (cur_offset != PROLOGUE_OFFSET) {
                pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
                            cur_offset, PROLOGUE_OFFSET);
                return -1;
        }
+
+       /* 4 byte extra for skb_copy_bits buffer */
+       ctx->stack_size = prog->aux->stack_depth + 4;
+       ctx->stack_size = STACK_ALIGN(ctx->stack_size);
+
+       /* Set up function call stack */
+       emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
        return 0;
 }
 
@@ -260,11 +261,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx)
        emit(A64_LDR64(prg, tmp, prg), ctx);
        emit(A64_CBZ(1, prg, jmp_offset), ctx);
 
-       /* goto *(prog->bpf_func + prologue_size); */
+       /* goto *(prog->bpf_func + prologue_offset); */
        off = offsetof(struct bpf_prog, bpf_func);
        emit_a64_mov_i64(tmp, off, ctx);
        emit(A64_LDR64(tmp, prg, tmp), ctx);
        emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
+       emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
        emit(A64_BR(tmp), ctx);
 
        /* out: */
index 28e02c99be6d72efca8128d8a11011e20bd89936..762eeb0fcc1dcaec27a4dff140274113a2c31fbf 100644 (file)
@@ -65,29 +65,30 @@ ia64_atomic_fetch_##op (int i, atomic_t *v)                         \
 ATOMIC_OPS(add, +)
 ATOMIC_OPS(sub, -)
 
-#define atomic_add_return(i,v)                                         \
+#ifdef __OPTIMIZE__
+#define __ia64_atomic_const(i) __builtin_constant_p(i) ?               \
+               ((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 ||       \
+                (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0
+
+#define atomic_add_return(i, v)                                                \
 ({                                                                     \
-       int __ia64_aar_i = (i);                                         \
-       (__builtin_constant_p(i)                                        \
-        && (   (__ia64_aar_i ==  1) || (__ia64_aar_i ==   4)           \
-            || (__ia64_aar_i ==  8) || (__ia64_aar_i ==  16)           \
-            || (__ia64_aar_i == -1) || (__ia64_aar_i ==  -4)           \
-            || (__ia64_aar_i == -8) || (__ia64_aar_i == -16)))         \
-               ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter)       \
-               : ia64_atomic_add(__ia64_aar_i, v);                     \
+       int __i = (i);                                                  \
+       static const int __ia64_atomic_p = __ia64_atomic_const(i);      \
+       __ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) :      \
+                               ia64_atomic_add(__i, v);                \
 })
 
-#define atomic_sub_return(i,v)                                         \
+#define atomic_sub_return(i, v)                                                \
 ({                                                                     \
-       int __ia64_asr_i = (i);                                         \
-       (__builtin_constant_p(i)                                        \
-        && (   (__ia64_asr_i ==   1) || (__ia64_asr_i ==   4)          \
-            || (__ia64_asr_i ==   8) || (__ia64_asr_i ==  16)          \
-            || (__ia64_asr_i ==  -1) || (__ia64_asr_i ==  -4)          \
-            || (__ia64_asr_i ==  -8) || (__ia64_asr_i == -16)))        \
-               ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter)      \
-               : ia64_atomic_sub(__ia64_asr_i, v);                     \
+       int __i = (i);                                                  \
+       static const int __ia64_atomic_p = __ia64_atomic_const(i);      \
+       __ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) :     \
+                               ia64_atomic_sub(__i, v);                \
 })
+#else
+#define atomic_add_return(i, v)        ia64_atomic_add(i, v)
+#define atomic_sub_return(i, v)        ia64_atomic_sub(i, v)
+#endif
 
 #define atomic_fetch_add(i,v)                                          \
 ({                                                                     \
index c6ecb97151a25774bf75ce5a1663210db4f6bce3..9025699049ca63b3834ed917d935224009d2d7a8 100644 (file)
@@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk)
        }
 
        if (ti->softirq_time) {
-               delta = cycle_to_nsec(ti->softirq_time));
+               delta = cycle_to_nsec(ti->softirq_time);
                account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
        }
 
index c51e6ce42e7a258d20b76f8331c6a8156ffda4b6..2ed525a44734cfaa62ba7125fab11225c1778a7a 100644 (file)
@@ -166,6 +166,7 @@ config PPC
        select GENERIC_CLOCKEVENTS_BROADCAST    if SMP
        select GENERIC_CMOS_UPDATE
        select GENERIC_CPU_AUTOPROBE
+       select GENERIC_CPU_VULNERABILITIES      if PPC_BOOK3S_64
        select GENERIC_IRQ_SHOW
        select GENERIC_IRQ_SHOW_LEVEL
        select GENERIC_SMP_IDLE_THREAD
index a703452d67b62f63b455098e88988fcc0b3776c6..555e22d5e07f9e21c322af718df72bdf2da0dfa4 100644 (file)
@@ -209,5 +209,11 @@ exc_##label##_book3e:
        ori     r3,r3,vector_offset@l;          \
        mtspr   SPRN_IVOR##vector_number,r3;
 
+#define RFI_TO_KERNEL                                                  \
+       rfi
+
+#define RFI_TO_USER                                                    \
+       rfi
+
 #endif /* _ASM_POWERPC_EXCEPTION_64E_H */
 
index b27205297e1d9ca5ca46db9e7189bf9187385803..7197b179c1b150ba819f13a1f7feb6c9a45da1f9 100644 (file)
  */
 #define EX_R3          EX_DAR
 
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT                                                 \
+       RFI_FLUSH_FIXUP_SECTION;                                        \
+       nop;                                                            \
+       nop;                                                            \
+       nop
+
+#define RFI_TO_KERNEL                                                  \
+       rfid
+
+#define RFI_TO_USER                                                    \
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL                                          \
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
+
+#define RFI_TO_GUEST                                                   \
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
+
+#define HRFI_TO_KERNEL                                                 \
+       hrfid
+
+#define HRFI_TO_USER                                                   \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL                                         \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
+#define HRFI_TO_GUEST                                                  \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN                                                        \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
 #ifdef CONFIG_RELOCATABLE
 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)                   \
        mfspr   r11,SPRN_##h##SRR0;     /* save SRR0 */                 \
@@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        mtspr   SPRN_##h##SRR0,r12;                                     \
        mfspr   r12,SPRN_##h##SRR1;     /* and SRR1 */                  \
        mtspr   SPRN_##h##SRR1,r10;                                     \
-       h##rfid;                                                        \
+       h##RFI_TO_KERNEL;                                               \
        b       .       /* prevent speculative execution */
 #define EXCEPTION_PROLOG_PSERIES_1(label, h)                           \
        __EXCEPTION_PROLOG_PSERIES_1(label, h)
@@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        mtspr   SPRN_##h##SRR0,r12;                                     \
        mfspr   r12,SPRN_##h##SRR1;     /* and SRR1 */                  \
        mtspr   SPRN_##h##SRR1,r10;                                     \
-       h##rfid;                                                        \
+       h##RFI_TO_KERNEL;                                               \
        b       .       /* prevent speculative execution */
 
 #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h)                      \
index 8f88f771cc55ce982c11599b273e584babd38600..1e82eb3caabd19c69289957da188b563d0bcd0d6 100644 (file)
@@ -187,7 +187,20 @@ label##3:                                          \
        FTR_ENTRY_OFFSET label##1b-label##3b;           \
        .popsection;
 
+#define RFI_FLUSH_FIXUP_SECTION                                \
+951:                                                   \
+       .pushsection __rfi_flush_fixup,"a";             \
+       .align 2;                                       \
+952:                                                   \
+       FTR_ENTRY_OFFSET 951b-952b;                     \
+       .popsection;
+
+
 #ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
 void apply_feature_fixups(void);
 void setup_feature_keys(void);
 #endif
index a409177be8bdfd5f717af6111700d088fb91e61b..eca3f9c689070632492b18e9cc9d55d075024dfe 100644 (file)
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_PERF_COUNT        0x1BC
 #define H_MANAGE_TRACE          0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
 #define H_QUERY_INT_STATE       0x1E4
 #define H_POLL_PENDING         0x1D8
 #define H_SIGNAL_SYS_RESET_ALL_OTHERS          -2
 /* >= 0 values are CPU number */
 
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31      (1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED   (1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30     (1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2     (1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV     (1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY    (1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR       (1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR  (1ull << 61) // IBM bit 2
+
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK     0x18
 #define PROC_TABLE_DEREG       0x10
 #define PROC_TABLE_GTSE                0x01
 
 #ifndef __ASSEMBLY__
+#include <linux/types.h>
 
 /**
  * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
@@ -436,6 +449,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
        }
 }
 
+struct h_cpu_char_result {
+       u64 character;
+       u64 behaviour;
+};
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
index 3892db93b8374e1f1256a6b497d384f3faedc06e..23ac7fc0af23b6cae8f4706665102dd7e2050667 100644 (file)
@@ -232,6 +232,16 @@ struct paca_struct {
        struct sibling_subcore_state *sibling_subcore_state;
 #endif
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+       /*
+        * rfi fallback flush must be in its own cacheline to prevent
+        * other paca data leaking into the L1d
+        */
+       u64 exrfi[EX_SIZE] __aligned(0x80);
+       void *rfi_flush_fallback_area;
+       u64 l1d_flush_congruence;
+       u64 l1d_flush_sets;
+#endif
 };
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
index 7f01b22fa6cb0d0895c914423a98cb2361fb2a16..55eddf50d1498020ba7f17216c689ab89b84f3c7 100644 (file)
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
        return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
 }
 
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+       unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+       long rc;
+
+       rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+       if (rc == H_SUCCESS) {
+               p->character = retbuf[0];
+               p->behaviour = retbuf[1];
+       }
+
+       return rc;
+}
+
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
index cf00ec26303aef1415c7d5fcd7425f2149ac0295..469b7fdc9be41cd9ab0ceb7f50c2b633c19f01a4 100644 (file)
@@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
 static inline void pseries_little_endian_exceptions(void) {}
 #endif /* CONFIG_PPC_PSERIES */
 
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+       L1D_FLUSH_NONE          = 0x1,
+       L1D_FLUSH_FALLBACK      = 0x2,
+       L1D_FLUSH_ORI           = 0x4,
+       L1D_FLUSH_MTTRIG        = 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_SETUP_H */
index 6b958414b4e036ac1e4c97bceb61277ffab65e76..f390d57cf2e1a711335bbd66cf7819e9dcd8442f 100644 (file)
@@ -237,6 +237,11 @@ int main(void)
        OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
        OFFSET(PACA_IN_MCE, paca_struct, in_mce);
        OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+       OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+       OFFSET(PACA_EXRFI, paca_struct, exrfi);
+       OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
+       OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+
 #endif
        OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
        OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
index 3320bcac71928eeaf85b884076e90767be4d264f..2748584b767da3f5d788c0be27b27662053853e4 100644 (file)
 #include <asm/tm.h>
 #include <asm/ppc-opcode.h>
 #include <asm/export.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
 
 /*
  * System calls.
@@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
        ld      r13,GPR13(r1)   /* only restore r13 if returning to usermode */
+       ld      r2,GPR2(r1)
+       ld      r1,GPR1(r1)
+       mtlr    r4
+       mtcr    r5
+       mtspr   SPRN_SRR0,r7
+       mtspr   SPRN_SRR1,r8
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
+
+       /* exit to kernel */
 1:     ld      r2,GPR2(r1)
        ld      r1,GPR1(r1)
        mtlr    r4
        mtcr    r5
        mtspr   SPRN_SRR0,r7
        mtspr   SPRN_SRR1,r8
-       RFI
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
 .Lsyscall_error:
@@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        mtmsrd  r10, 1
        mtspr   SPRN_SRR0, r11
        mtspr   SPRN_SRR1, r12
-
-       rfid
+       RFI_TO_USER
        b       .       /* prevent speculative execution */
 #endif
 _ASM_NOKPROBE_SYMBOL(system_call_common);
@@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
        REST_GPR(13, r1)
-1:
+
        mtspr   SPRN_SRR1,r3
 
        ld      r2,_CCR(r1)
@@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        ld      r3,GPR3(r1)
        ld      r4,GPR4(r1)
        ld      r1,GPR1(r1)
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
 
-       rfid
+1:     mtspr   SPRN_SRR1,r3
+
+       ld      r2,_CCR(r1)
+       mtcrf   0xFF,r2
+       ld      r2,_NIP(r1)
+       mtspr   SPRN_SRR0,r2
+
+       ld      r0,GPR0(r1)
+       ld      r2,GPR2(r1)
+       ld      r3,GPR3(r1)
+       ld      r4,GPR4(r1)
+       ld      r1,GPR1(r1)
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
 #endif /* CONFIG_PPC_BOOK3E */
@@ -1073,7 +1101,7 @@ __enter_rtas:
        
        mtspr   SPRN_SRR0,r5
        mtspr   SPRN_SRR1,r6
-       rfid
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
 rtas_return_loc:
@@ -1098,7 +1126,7 @@ rtas_return_loc:
 
        mtspr   SPRN_SRR0,r3
        mtspr   SPRN_SRR1,r4
-       rfid
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
@@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
        LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
        andc    r11,r11,r12
        mtsrr1  r11
-       rfid
+       RFI_TO_KERNEL
 #endif /* CONFIG_PPC_BOOK3E */
 
 1:     /* Return from OF */
index e441b469dc8f61f6381c7c95b086b677004d401b..2dc10bf646b887b51dc2dc28feeb4aac6d9fc00d 100644 (file)
@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
        LOAD_HANDLER(r12, machine_check_handle_early)
 1:     mtspr   SPRN_SRR0,r12
        mtspr   SPRN_SRR1,r11
-       rfid
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 2:
        /* Stack overflow. Stay on emergency stack and panic.
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
        li      r3,MSR_ME
        andc    r10,r10,r3              /* Turn off MSR_ME */
        mtspr   SPRN_SRR1,r10
-       rfid
+       RFI_TO_KERNEL
        b       .
 2:
        /*
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
         */
        bl      machine_check_queue_event
        MACHINE_CHECK_HANDLER_WINDUP
-       rfid
+       RFI_TO_USER_OR_KERNEL
 9:
        /* Deliver the machine check to host kernel in V mode. */
        MACHINE_CHECK_HANDLER_WINDUP
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
        stw     r9,PACA_EXSLB+EX_CCR(r13)       /* save CR in exc. frame */
        std     r10,PACA_EXSLB+EX_LR(r13)       /* save LR */
 
+       andi.   r9,r11,MSR_PR   // Check for exception from userspace
+       cmpdi   cr4,r9,MSR_PR   // And save the result in CR4 for later
+
        /*
         * Test MSR_RI before calling slb_allocate_realmode, because the
         * MSR in r11 gets clobbered. However we still want to allocate
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
        /* All done -- return from exception. */
 
+       bne     cr4,1f          /* returning to kernel */
+
 .machine       push
 .machine       "power4"
        mtcrf   0x80,r9
+       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
        mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
        mtcrf   0x02,r9         /* I/D indication is in cr6 */
        mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
@@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        ld      r11,PACA_EXSLB+EX_R11(r13)
        ld      r12,PACA_EXSLB+EX_R12(r13)
        ld      r13,PACA_EXSLB+EX_R13(r13)
-       rfid
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
+1:
+.machine       push
+.machine       "power4"
+       mtcrf   0x80,r9
+       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
+       mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
+       mtcrf   0x02,r9         /* I/D indication is in cr6 */
+       mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
+.machine       pop
+
+       RESTORE_CTR(r9, PACA_EXSLB)
+       RESTORE_PPR_PACA(PACA_EXSLB, r9)
+       mr      r3,r12
+       ld      r9,PACA_EXSLB+EX_R9(r13)
+       ld      r10,PACA_EXSLB+EX_R10(r13)
+       ld      r11,PACA_EXSLB+EX_R11(r13)
+       ld      r12,PACA_EXSLB+EX_R12(r13)
+       ld      r13,PACA_EXSLB+EX_R13(r13)
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
+
 2:     std     r3,PACA_EXSLB+EX_DAR(r13)
        mr      r3,r12
        mfspr   r11,SPRN_SRR0
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        mtspr   SPRN_SRR0,r10
        ld      r10,PACAKMSR(r13)
        mtspr   SPRN_SRR1,r10
-       rfid
+       RFI_TO_KERNEL
        b       .
 
 8:     std     r3,PACA_EXSLB+EX_DAR(r13)
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        mtspr   SPRN_SRR0,r10
        ld      r10,PACAKMSR(r13)
        mtspr   SPRN_SRR1,r10
-       rfid
+       RFI_TO_KERNEL
        b       .
 
 EXC_COMMON_BEGIN(unrecov_slb)
@@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
        mtspr   SPRN_SRR0,r10 ;                                 \
        ld      r10,PACAKMSR(r13) ;                             \
        mtspr   SPRN_SRR1,r10 ;                                 \
-       rfid ;                                                  \
+       RFI_TO_KERNEL ;                                         \
        b       . ;     /* prevent speculative execution */
 
 #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
@@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)                              \
        xori    r12,r12,MSR_LE ;                                \
        mtspr   SPRN_SRR1,r12 ;                                 \
        mr      r13,r9 ;                                        \
-       rfid ;          /* return to userspace */               \
+       RFI_TO_USER ;   /* return to userspace */               \
        b       . ;     /* prevent speculative execution */
 #else
 #define SYSCALL_FASTENDIAN_TEST
@@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
        mtcr    r11
        REST_GPR(11, r1)
        ld      r1,GPR1(r1)
-       hrfid
+       HRFI_TO_USER_OR_KERNEL
 
 1:     mtcr    r11
        REST_GPR(11, r1)
@@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
        ld      r11,PACA_EXGEN+EX_R11(r13)
        ld      r12,PACA_EXGEN+EX_R12(r13)
        ld      r13,PACA_EXGEN+EX_R13(r13)
-       HRFID
+       HRFI_TO_UNKNOWN
        b       .
 #endif
 
@@ -1418,10 +1445,94 @@ masked_##_H##interrupt:                                 \
        ld      r10,PACA_EXGEN+EX_R10(r13);             \
        ld      r11,PACA_EXGEN+EX_R11(r13);             \
        /* returns to kernel where r13 must be set up, so don't restore it */ \
-       ##_H##rfid;                                     \
+       ##_H##RFI_TO_KERNEL;                            \
        b       .;                                      \
        MASKED_DEC_HANDLER(_H)
 
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+       SET_SCRATCH0(r13);
+       GET_PACA(r13);
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       std     r12,PACA_EXRFI+EX_R12(r13)
+       std     r8,PACA_EXRFI+EX_R13(r13)
+       mfctr   r9
+       ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+       ld      r11,PACA_L1D_FLUSH_SETS(r13)
+       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+       addi    r12,r12,8
+       mtctr   r11
+       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+       /* order ld/st prior to dcbt stop all streams with flushing */
+       sync
+1:     li      r8,0
+       .rept   8 /* 8-way set associative */
+       ldx     r11,r10,r8
+       add     r8,r8,r12
+       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
+       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
+       .endr
+       addi    r10,r10,128 /* 128 byte cache line */
+       bdnz    1b
+
+       mtctr   r9
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ld      r11,PACA_EXRFI+EX_R11(r13)
+       ld      r12,PACA_EXRFI+EX_R12(r13)
+       ld      r8,PACA_EXRFI+EX_R13(r13)
+       GET_SCRATCH0(r13);
+       rfid
+
+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
+       SET_SCRATCH0(r13);
+       GET_PACA(r13);
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       std     r12,PACA_EXRFI+EX_R12(r13)
+       std     r8,PACA_EXRFI+EX_R13(r13)
+       mfctr   r9
+       ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+       ld      r11,PACA_L1D_FLUSH_SETS(r13)
+       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+       addi    r12,r12,8
+       mtctr   r11
+       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+       /* order ld/st prior to dcbt stop all streams with flushing */
+       sync
+1:     li      r8,0
+       .rept   8 /* 8-way set associative */
+       ldx     r11,r10,r8
+       add     r8,r8,r12
+       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
+       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
+       .endr
+       addi    r10,r10,128 /* 128 byte cache line */
+       bdnz    1b
+
+       mtctr   r9
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ld      r11,PACA_EXRFI+EX_R11(r13)
+       ld      r12,PACA_EXRFI+EX_R12(r13)
+       ld      r8,PACA_EXRFI+EX_R13(r13)
+       GET_SCRATCH0(r13);
+       hrfid
+
 /*
  * Real mode exceptions actually use this too, but alternate
  * instruction code patches (which end up in the common .text area)
@@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
        addi    r13, r13, 4
        mtspr   SPRN_SRR0, r13
        GET_SCRATCH0(r13)
-       rfid
+       RFI_TO_KERNEL
        b       .
 
 TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
        addi    r13, r13, 4
        mtspr   SPRN_HSRR0, r13
        GET_SCRATCH0(r13)
-       hrfid
+       HRFI_TO_KERNEL
        b       .
 #endif
 
index 9d213542a48bb91360b7b540ef429473494ebe98..8fd3a70047f1dae91f5e3d11479a178aa64b2ed3 100644 (file)
@@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        unsigned short maj;
        unsigned short min;
 
-       /* We only show online cpus: disable preempt (overzealous, I
-        * knew) to prevent cpu going down. */
-       preempt_disable();
-       if (!cpu_online(cpu_id)) {
-               preempt_enable();
-               return 0;
-       }
-
 #ifdef CONFIG_SMP
        pvr = per_cpu(cpu_pvr, cpu_id);
 #else
@@ -358,9 +350,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 #ifdef CONFIG_SMP
        seq_printf(m, "\n");
 #endif
-
-       preempt_enable();
-
        /* If this is the last cpu, print the summary */
        if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
                show_cpuinfo_summary(m);
index 8956a9856604e72634dd11ef416ec9788660bc37..e67413f4a8f0c1fc852c1231f09176c152ed9059 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/memory.h>
 #include <linux/nmi.h>
 
+#include <asm/debugfs.h>
 #include <asm/io.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
@@ -801,3 +802,141 @@ static int __init disable_hardlockup_detector(void)
        return 0;
 }
 early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+bool rfi_flush;
+
+static int __init handle_no_rfi_flush(char *p)
+{
+       pr_info("rfi-flush: disabled on command line.");
+       no_rfi_flush = true;
+       return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+       pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+       handle_no_rfi_flush(NULL);
+       return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+       /*
+        * We don't need to do the flush explicitly, just enter+exit kernel is
+        * sufficient, the RFI exit handlers will do the right thing.
+        */
+}
+
+void rfi_flush_enable(bool enable)
+{
+       if (rfi_flush == enable)
+               return;
+
+       if (enable) {
+               do_rfi_flush_fixups(enabled_flush_types);
+               on_each_cpu(do_nothing, NULL, 1);
+       } else
+               do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+       rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+       u64 l1d_size, limit;
+       int cpu;
+
+       l1d_size = ppc64_caches.l1d.size;
+       limit = min(safe_stack_limit(), ppc64_rma_size);
+
+       /*
+        * Align to L1d size, and size it at 2x L1d size, to catch possible
+        * hardware prefetch runoff. We don't have a recipe for load patterns to
+        * reliably avoid the prefetcher.
+        */
+       l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+       memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+       for_each_possible_cpu(cpu) {
+               /*
+                * The fallback flush is currently coded for 8-way
+                * associativity. Different associativity is possible, but it
+                * will be treated as 8-way and may not evict the lines as
+                * effectively.
+                *
+                * 128 byte lines are mandatory.
+                */
+               u64 c = l1d_size / 8;
+
+               paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+               paca[cpu].l1d_flush_congruence = c;
+               paca[cpu].l1d_flush_sets = c / 128;
+       }
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+       if (types & L1D_FLUSH_FALLBACK) {
+               pr_info("rfi-flush: Using fallback displacement flush\n");
+               init_fallback_flush();
+       }
+
+       if (types & L1D_FLUSH_ORI)
+               pr_info("rfi-flush: Using ori type flush\n");
+
+       if (types & L1D_FLUSH_MTTRIG)
+               pr_info("rfi-flush: Using mttrig type flush\n");
+
+       enabled_flush_types = types;
+
+       if (!no_rfi_flush)
+               rfi_flush_enable(enable);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int rfi_flush_set(void *data, u64 val)
+{
+       if (val == 1)
+               rfi_flush_enable(true);
+       else if (val == 0)
+               rfi_flush_enable(false);
+       else
+               return -EINVAL;
+
+       return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+       *val = rfi_flush ? 1 : 0;
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+       debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+       return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
+#endif
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       if (rfi_flush)
+               return sprintf(buf, "Mitigation: RFI Flush\n");
+
+       return sprintf(buf, "Vulnerable\n");
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
index 0494e1566ee2ab9b976cb0d9edb8c195a4490a23..307843d23682a79f0e4d9ae0cdb86a219e0b6e6a 100644 (file)
@@ -132,6 +132,15 @@ SECTIONS
        /* Read-only data */
        RO_DATA(PAGE_SIZE)
 
+#ifdef CONFIG_PPC64
+       . = ALIGN(8);
+       __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+               __start___rfi_flush_fixup = .;
+               *(__rfi_flush_fixup)
+               __stop___rfi_flush_fixup = .;
+       }
+#endif
+
        EXCEPTION_TABLE(0)
 
        NOTES :kernel :notes
index 29ebe2fd58674c59f27803a5426e4d2414f39418..a93d719edc906887b0f4bf412b2b76ac04babfec 100644 (file)
@@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
                gpte->may_read = true;
                gpte->may_write = true;
                gpte->page_size = MMU_PAGE_4K;
+               gpte->wimg = HPTE_R_M;
 
                return 0;
        }
index 966097232d2147bbcd79df41354a5f973fb9b7c1..b73dbc9e797da76bf4305a73972c4af925fbd70d 100644 (file)
@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
        u32 order;
 
        /* These fields protected by kvm->lock */
+
+       /* Possible values and their usage:
+        *  <0     an error occurred during allocation,
+        *  -EBUSY allocation is in the progress,
+        *  0      allocation made successfuly.
+        */
        int error;
-       bool prepare_done;
 
-       /* Private to the work thread, until prepare_done is true,
-        * then protected by kvm->resize_hpt_sem */
+       /* Private to the work thread, until error != -EBUSY,
+        * then protected by kvm->lock.
+        */
        struct kvm_hpt_info hpt;
 };
 
@@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
                 * Reset all the reverse-mapping chains for all memslots
                 */
                kvmppc_rmap_reset(kvm);
-               /* Ensure that each vcpu will flush its TLB on next entry. */
-               cpumask_setall(&kvm->arch.need_tlb_flush);
                err = 0;
                goto out;
        }
@@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
        kvmppc_set_hpt(kvm, &info);
 
 out:
+       if (err == 0)
+               /* Ensure that each vcpu will flush its TLB on next entry. */
+               cpumask_setall(&kvm->arch.need_tlb_flush);
+
        mutex_unlock(&kvm->lock);
        return err;
 }
@@ -1413,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
 
 static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
 {
-       BUG_ON(kvm->arch.resize_hpt != resize);
+       if (WARN_ON(!mutex_is_locked(&kvm->lock)))
+               return;
 
        if (!resize)
                return;
 
-       if (resize->hpt.virt)
-               kvmppc_free_hpt(&resize->hpt);
+       if (resize->error != -EBUSY) {
+               if (resize->hpt.virt)
+                       kvmppc_free_hpt(&resize->hpt);
+               kfree(resize);
+       }
 
-       kvm->arch.resize_hpt = NULL;
-       kfree(resize);
+       if (kvm->arch.resize_hpt == resize)
+               kvm->arch.resize_hpt = NULL;
 }
 
 static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1431,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
                                                     struct kvm_resize_hpt,
                                                     work);
        struct kvm *kvm = resize->kvm;
-       int err;
+       int err = 0;
 
-       resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
-                        resize->order);
-
-       err = resize_hpt_allocate(resize);
+       if (WARN_ON(resize->error != -EBUSY))
+               return;
 
        mutex_lock(&kvm->lock);
 
+       /* Request is still current? */
+       if (kvm->arch.resize_hpt == resize) {
+               /* We may request large allocations here:
+                * do not sleep with kvm->lock held for a while.
+                */
+               mutex_unlock(&kvm->lock);
+
+               resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+                                resize->order);
+
+               err = resize_hpt_allocate(resize);
+
+               /* We have strict assumption about -EBUSY
+                * when preparing for HPT resize.
+                */
+               if (WARN_ON(err == -EBUSY))
+                       err = -EINPROGRESS;
+
+               mutex_lock(&kvm->lock);
+               /* It is possible that kvm->arch.resize_hpt != resize
+                * after we grab kvm->lock again.
+                */
+       }
+
        resize->error = err;
-       resize->prepare_done = true;
+
+       if (kvm->arch.resize_hpt != resize)
+               resize_hpt_release(kvm, resize);
 
        mutex_unlock(&kvm->lock);
 }
@@ -1466,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 
        if (resize) {
                if (resize->order == shift) {
-                       /* Suitable resize in progress */
-                       if (resize->prepare_done) {
-                               ret = resize->error;
-                               if (ret != 0)
-                                       resize_hpt_release(kvm, resize);
-                       } else {
+                       /* Suitable resize in progress? */
+                       ret = resize->error;
+                       if (ret == -EBUSY)
                                ret = 100; /* estimated time in ms */
-                       }
+                       else if (ret)
+                               resize_hpt_release(kvm, resize);
 
                        goto out;
                }
@@ -1493,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
                ret = -ENOMEM;
                goto out;
        }
+
+       resize->error = -EBUSY;
        resize->order = shift;
        resize->kvm = kvm;
        INIT_WORK(&resize->work, resize_hpt_prepare_work);
@@ -1547,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
        if (!resize || (resize->order != shift))
                goto out;
 
-       ret = -EBUSY;
-       if (!resize->prepare_done)
-               goto out;
-
        ret = resize->error;
-       if (ret != 0)
+       if (ret)
                goto out;
 
        ret = resize_hpt_rehash(resize);
-       if (ret != 0)
+       if (ret)
                goto out;
 
        resize_hpt_pivot(resize);
index 2659844784b817d7ca77e6e95ecb9418f430e62d..9c61f736c75b2d0761ec4f9d2e385df75b6dc882 100644 (file)
@@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
        mtmsrd  r0,1            /* clear RI in MSR */
        mtsrr0  r5
        mtsrr1  r6
-       RFI
+       RFI_TO_KERNEL
 
 kvmppc_call_hv_entry:
 BEGIN_FTR_SECTION
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mtmsrd  r6, 1                   /* Clear RI in MSR */
        mtsrr0  r8
        mtsrr1  r7
-       RFI
+       RFI_TO_KERNEL
 
        /* Virtual-mode return */
 .Lvirt_return:
@@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
        ld      r0, VCPU_GPR(R0)(r4)
        ld      r4, VCPU_GPR(R4)(r4)
-
-       hrfid
+       HRFI_TO_GUEST
        b       .
 
 secondary_too_late:
@@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
        ld      r4, PACAKMSR(r13)
        mtspr   SPRN_SRR0, r3
        mtspr   SPRN_SRR1, r4
-       rfid
+       RFI_TO_KERNEL
 9:     addi    r3, r1, STACK_FRAME_OVERHEAD
        bl      kvmppc_bad_interrupt
        b       9b
index d0dc8624198f8e4663800c4ca603aea98d4ba128..7deaeeb14b9358d8a4e6f1107cd42bba0605a264 100644 (file)
@@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 #define MSR_USER32 MSR_USER
 #define MSR_USER64 MSR_USER
 #define HW_PAGE_SIZE PAGE_SIZE
+#define HPTE_R_M   _PAGE_COHERENT
 #endif
 
 static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
@@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                pte.eaddr = eaddr;
                pte.vpage = eaddr >> 12;
                pte.page_size = MMU_PAGE_64K;
+               pte.wimg = HPTE_R_M;
        }
 
        switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
index 42a4b237df5f5731a4f6964feafc4db2835ffdee..34a5adeff0840662e8eae4553b008069bfd9426d 100644 (file)
@@ -46,6 +46,9 @@
 
 #define FUNC(name)             name
 
+#define RFI_TO_KERNEL  RFI
+#define RFI_TO_GUEST   RFI
+
 .macro INTERRUPT_TRAMPOLINE intno
 
 .global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
        GET_SCRATCH0(r13)
 
        /* And get back into the code */
-       RFI
+       RFI_TO_KERNEL
 #endif
 
 /*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
        ori     r5, r5, MSR_EE
        mtsrr0  r7
        mtsrr1  r6
-       RFI
+       RFI_TO_KERNEL
 
 #include "book3s_segment.S"
index 2a2b96d5399917398312dacb7c7b2846483a7fd4..93a180ceefad03343d1f9064420ee00ca54973d7 100644 (file)
@@ -156,7 +156,7 @@ no_dcbz32_on:
        PPC_LL  r9, SVCPU_R9(r3)
        PPC_LL  r3, (SVCPU_R3)(r3)
 
-       RFI
+       RFI_TO_GUEST
 kvmppc_handler_trampoline_enter_end:
 
 
@@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
        cmpwi   r12, BOOK3S_INTERRUPT_DOORBELL
        beqa    BOOK3S_INTERRUPT_DOORBELL
 
-       RFI
+       RFI_TO_KERNEL
 kvmppc_handler_trampoline_exit_end:
index 41cf5ae273cf74a2747d13b9da2274b8eb2054cb..a95ea007d654d5db2b78d811a4ef21a750a95609 100644 (file)
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
        }
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+       unsigned int instrs[3], *dest;
+       long *start, *end;
+       int i;
+
+       start = PTRRELOC(&__start___rfi_flush_fixup),
+       end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+       instrs[0] = 0x60000000; /* nop */
+       instrs[1] = 0x60000000; /* nop */
+       instrs[2] = 0x60000000; /* nop */
+
+       if (types & L1D_FLUSH_FALLBACK)
+               /* b .+16 to fallback flush */
+               instrs[0] = 0x48000010;
+
+       i = 0;
+       if (types & L1D_FLUSH_ORI) {
+               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+               instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+       }
+
+       if (types & L1D_FLUSH_MTTRIG)
+               instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+       for (i = 0; start < end; start++, i++) {
+               dest = (void *)start + *start;
+
+               pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+               patch_instruction(dest, instrs[0]);
+               patch_instruction(dest + 1, instrs[1]);
+               patch_instruction(dest + 2, instrs[2]);
+       }
+
+       printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
        long *start, *end;
index 1edfbc1e40f4387b30dd5c0a9491935460feaf92..4fb21e17504aad72295a9e1cdffe54c5547379bd 100644 (file)
 #include <asm/kexec.h>
 #include <asm/smp.h>
 #include <asm/tm.h>
+#include <asm/setup.h>
 
 #include "powernv.h"
 
+static void pnv_setup_rfi_flush(void)
+{
+       struct device_node *np, *fw_features;
+       enum l1d_flush_type type;
+       int enable;
+
+       /* Default to fallback in case fw-features are not available */
+       type = L1D_FLUSH_FALLBACK;
+       enable = 1;
+
+       np = of_find_node_by_name(NULL, "ibm,opal");
+       fw_features = of_get_child_by_name(np, "fw-features");
+       of_node_put(np);
+
+       if (fw_features) {
+               np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
+               if (np && of_property_read_bool(np, "enabled"))
+                       type = L1D_FLUSH_MTTRIG;
+
+               of_node_put(np);
+
+               np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
+               if (np && of_property_read_bool(np, "enabled"))
+                       type = L1D_FLUSH_ORI;
+
+               of_node_put(np);
+
+               /* Enable unless firmware says NOT to */
+               enable = 2;
+               np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
+               if (np && of_property_read_bool(np, "disabled"))
+                       enable--;
+
+               of_node_put(np);
+
+               np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
+               if (np && of_property_read_bool(np, "disabled"))
+                       enable--;
+
+               of_node_put(np);
+               of_node_put(fw_features);
+       }
+
+       setup_rfi_flush(type, enable > 0);
+}
+
 static void __init pnv_setup_arch(void)
 {
        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
+       pnv_setup_rfi_flush();
+
        /* Initialize SMP */
        pnv_smp_init();
 
index 6e35780c5962f25144b0181414c03a017840478f..a0b20c03f078cc41b9ad126fbf2603a4ffb68463 100644 (file)
@@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
 
 static CLASS_ATTR_RW(dlpar);
 
-static int __init pseries_dlpar_init(void)
+int __init dlpar_workqueue_init(void)
 {
+       if (pseries_hp_wq)
+               return 0;
+
        pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
-                                       WQ_UNBOUND, 1);
+                       WQ_UNBOUND, 1);
+
+       return pseries_hp_wq ? 0 : -ENOMEM;
+}
+
+static int __init dlpar_sysfs_init(void)
+{
+       int rc;
+
+       rc = dlpar_workqueue_init();
+       if (rc)
+               return rc;
+
        return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
 }
-machine_device_initcall(pseries, pseries_dlpar_init);
+machine_device_initcall(pseries, dlpar_sysfs_init);
 
index 4470a3194311e06e040c624b8b1491eb0e071298..1ae1d9f4dbe99935130971cdc390d8f581ad788e 100644 (file)
@@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)
        return CMO_PageSize;
 }
 
+int dlpar_workqueue_init(void);
+
 #endif /* _PSERIES_PSERIES_H */
index 4923ffe230cf926565c0ed4c9a339a822b073c15..81d8614e73790b1923a3c8cfd336a2531fee76ce 100644 (file)
@@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)
        /* Hotplug Events */
        np = of_find_node_by_path("/event-sources/hot-plug-events");
        if (np != NULL) {
-               request_event_sources_irqs(np, ras_hotplug_interrupt,
+               if (dlpar_workqueue_init() == 0)
+                       request_event_sources_irqs(np, ras_hotplug_interrupt,
                                           "RAS_HOTPLUG");
                of_node_put(np);
        }
index a8531e01265842f39e759bc59aa67f3afd461486..ae4f596273b51a836e5d27307f81bfe6438590bf 100644 (file)
@@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
        of_pci_check_probe_only();
 }
 
+static void pseries_setup_rfi_flush(void)
+{
+       struct h_cpu_char_result result;
+       enum l1d_flush_type types;
+       bool enable;
+       long rc;
+
+       /* Enable by default */
+       enable = true;
+
+       rc = plpar_get_cpu_characteristics(&result);
+       if (rc == H_SUCCESS) {
+               types = L1D_FLUSH_NONE;
+
+               if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+                       types |= L1D_FLUSH_MTTRIG;
+               if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+                       types |= L1D_FLUSH_ORI;
+
+               /* Use fallback if nothing set in hcall */
+               if (types == L1D_FLUSH_NONE)
+                       types = L1D_FLUSH_FALLBACK;
+
+               if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+                       enable = false;
+       } else {
+               /* Default to fallback if case hcall is not available */
+               types = L1D_FLUSH_FALLBACK;
+       }
+
+       setup_rfi_flush(types, enable);
+}
+
 static void __init pSeries_setup_arch(void)
 {
        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
 
        fwnmi_init();
 
+       pseries_setup_rfi_flush();
+
        /* By default, only probe PCI (can be overridden by rtas_pci) */
        pci_add_flags(PCI_PROBE_ONLY);
 
index cab24f549e7cbdc8786c5fa5b7450f0dcda8d87c..0ddc7ac6c5f13df2dd688a0dee38bfb53a2cdf80 100644 (file)
@@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu)
        DUMP(p, kernel_toc, "lx");
        DUMP(p, kernelbase, "lx");
        DUMP(p, kernel_msr, "lx");
-       DUMP(p, emergency_sp, "p");
+       DUMP(p, emergency_sp, "px");
 #ifdef CONFIG_PPC_BOOK3S_64
-       DUMP(p, nmi_emergency_sp, "p");
-       DUMP(p, mc_emergency_sp, "p");
+       DUMP(p, nmi_emergency_sp, "px");
+       DUMP(p, mc_emergency_sp, "px");
        DUMP(p, in_nmi, "x");
        DUMP(p, in_mce, "x");
        DUMP(p, hmi_event_available, "x");
@@ -2375,17 +2375,21 @@ static void dump_one_paca(int cpu)
        DUMP(p, slb_cache_ptr, "x");
        for (i = 0; i < SLB_CACHE_ENTRIES; i++)
                printf(" slb_cache[%d]:        = 0x%016lx\n", i, p->slb_cache[i]);
+
+       DUMP(p, rfi_flush_fallback_area, "px");
+       DUMP(p, l1d_flush_congruence, "llx");
+       DUMP(p, l1d_flush_sets, "llx");
 #endif
        DUMP(p, dscr_default, "llx");
 #ifdef CONFIG_PPC_BOOK3E
-       DUMP(p, pgd, "p");
-       DUMP(p, kernel_pgd, "p");
-       DUMP(p, tcd_ptr, "p");
-       DUMP(p, mc_kstack, "p");
-       DUMP(p, crit_kstack, "p");
-       DUMP(p, dbg_kstack, "p");
+       DUMP(p, pgd, "px");
+       DUMP(p, kernel_pgd, "px");
+       DUMP(p, tcd_ptr, "px");
+       DUMP(p, mc_kstack, "px");
+       DUMP(p, crit_kstack, "px");
+       DUMP(p, dbg_kstack, "px");
 #endif
-       DUMP(p, __current, "p");
+       DUMP(p, __current, "px");
        DUMP(p, kstack, "lx");
        printf(" kstack_base          = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));
        DUMP(p, stab_rr, "lx");
@@ -2403,7 +2407,7 @@ static void dump_one_paca(int cpu)
 #endif
 
 #ifdef CONFIG_PPC_POWERNV
-       DUMP(p, core_idle_state_ptr, "p");
+       DUMP(p, core_idle_state_ptr, "px");
        DUMP(p, thread_idle_state, "x");
        DUMP(p, thread_mask, "x");
        DUMP(p, subcore_sibling_mask, "x");
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..47dacf06c679f3eff22c0a6ab0f619d9b4019ab5 100644 (file)
@@ -0,0 +1,75 @@
+CONFIG_SMP=y
+CONFIG_PCI=y
+CONFIG_PCIE_XILINX=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NETLINK_DIAG=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_MACB=y
+CONFIG_E1000E=y
+CONFIG_R8169=y
+CONFIG_MICROSEMI_PHY=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_PLATFORM=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_UAS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_RAS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
+# CONFIG_RCU_TRACE is not set
+CONFIG_CRYPTO_USER_API_HASH=y
index 0d64bc9f4f91562872b36231e5b593887498cde2..3c7a2c97e377a5af2923c8e7d96cf04e57e66892 100644 (file)
 #include <linux/const.h>
 
 /* Status register flags */
-#define SR_IE   _AC(0x00000002, UL) /* Interrupt Enable */
-#define SR_PIE  _AC(0x00000020, UL) /* Previous IE */
-#define SR_PS   _AC(0x00000100, UL) /* Previously Supervisor */
-#define SR_SUM  _AC(0x00040000, UL) /* Supervisor may access User Memory */
+#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_SPIE        _AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */
 
 #define SR_FS           _AC(0x00006000, UL) /* Floating-point Status */
 #define SR_FS_OFF       _AC(0x00000000, UL)
index a82ce599b639813c9ed3ad697f217cb09a6538e1..b269451e7e85577c76cb718283806fdfb6f76532 100644 (file)
@@ -21,8 +21,6 @@
 
 #include <linux/types.h>
 
-#ifdef CONFIG_MMU
-
 extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
 
 /*
@@ -36,8 +34,6 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
 
 extern void iounmap(volatile void __iomem *addr);
 
-#endif /* CONFIG_MMU */
-
 /* Generic IO read/write.  These perform native-endian accesses. */
 #define __raw_writeb __raw_writeb
 static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
index 6fdc860d7f84fd69648af547ff8984946eac93fd..07a3c6d5706ff8fd8f34acbe6c5832fc6e638676 100644 (file)
@@ -27,25 +27,25 @@ static inline unsigned long arch_local_save_flags(void)
 /* unconditionally enable interrupts */
 static inline void arch_local_irq_enable(void)
 {
-       csr_set(sstatus, SR_IE);
+       csr_set(sstatus, SR_SIE);
 }
 
 /* unconditionally disable interrupts */
 static inline void arch_local_irq_disable(void)
 {
-       csr_clear(sstatus, SR_IE);
+       csr_clear(sstatus, SR_SIE);
 }
 
 /* get status and disable interrupts */
 static inline unsigned long arch_local_irq_save(void)
 {
-       return csr_read_clear(sstatus, SR_IE);
+       return csr_read_clear(sstatus, SR_SIE);
 }
 
 /* test flags */
 static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
-       return !(flags & SR_IE);
+       return !(flags & SR_SIE);
 }
 
 /* test hardware interrupt enable bit */
@@ -57,7 +57,7 @@ static inline int arch_irqs_disabled(void)
 /* set interrupt enabled status */
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-       csr_set(sstatus, flags & SR_IE);
+       csr_set(sstatus, flags & SR_SIE);
 }
 
 #endif /* _ASM_RISCV_IRQFLAGS_H */
index 2cbd92ed1629c00df42b8ceaffd2250b6de7413a..16301966d65b6fd8a54614d12f0815866d19d948 100644 (file)
@@ -20,8 +20,6 @@
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_MMU
-
 /* Page Upper Directory not used in RISC-V */
 #include <asm-generic/pgtable-nopud.h>
 #include <asm/page.h>
@@ -413,8 +411,6 @@ static inline void pgtable_cache_init(void)
        /* No page table caches to initialize */
 }
 
-#endif /* CONFIG_MMU */
-
 #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
 #define VMALLOC_END      (PAGE_OFFSET - 1)
 #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
index 93b8956e25e46714a5bd789ed0ea15ebb2a687f2..2c5df945d43c9abfdfd197a61d8c92ce20e48133 100644 (file)
@@ -66,7 +66,7 @@ struct pt_regs {
 #define REG_FMT "%08lx"
 #endif
 
-#define user_mode(regs) (((regs)->sstatus & SR_PS) == 0)
+#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
 
 
 /* Helpers for working with the instruction pointer */
index 715b0f10af580811dfca3ba067819e07fe1e7374..7b9c24ebdf5293ac73b8155b2c394fc9244a712d 100644 (file)
@@ -15,8 +15,6 @@
 #ifndef _ASM_RISCV_TLBFLUSH_H
 #define _ASM_RISCV_TLBFLUSH_H
 
-#ifdef CONFIG_MMU
-
 #include <linux/mm_types.h>
 
 /*
@@ -64,6 +62,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
        flush_tlb_all();
 }
 
-#endif /* CONFIG_MMU */
-
 #endif /* _ASM_RISCV_TLBFLUSH_H */
index 27b90d64814b8d0422d0316f8e765b6e97081a47..14b0b22fb57875dfe920685265a79da317c517e0 100644 (file)
@@ -127,7 +127,6 @@ extern int fixup_exception(struct pt_regs *state);
  * call.
  */
 
-#ifdef CONFIG_MMU
 #define __get_user_asm(insn, x, ptr, err)                      \
 do {                                                           \
        uintptr_t __tmp;                                        \
@@ -153,13 +152,11 @@ do {                                                              \
        __disable_user_access();                                \
        (x) = __x;                                              \
 } while (0)
-#endif /* CONFIG_MMU */
 
 #ifdef CONFIG_64BIT
 #define __get_user_8(x, ptr, err) \
        __get_user_asm("ld", x, ptr, err)
 #else /* !CONFIG_64BIT */
-#ifdef CONFIG_MMU
 #define __get_user_8(x, ptr, err)                              \
 do {                                                           \
        u32 __user *__ptr = (u32 __user *)(ptr);                \
@@ -193,7 +190,6 @@ do {                                                                \
        (x) = (__typeof__(x))((__typeof__((x)-(x)))(            \
                (((u64)__hi << 32) | __lo)));                   \
 } while (0)
-#endif /* CONFIG_MMU */
 #endif /* CONFIG_64BIT */
 
 
@@ -267,8 +263,6 @@ do {                                                                \
                ((x) = 0, -EFAULT);                             \
 })
 
-
-#ifdef CONFIG_MMU
 #define __put_user_asm(insn, x, ptr, err)                      \
 do {                                                           \
        uintptr_t __tmp;                                        \
@@ -292,14 +286,11 @@ do {                                                              \
                : "rJ" (__x), "i" (-EFAULT));                   \
        __disable_user_access();                                \
 } while (0)
-#endif /* CONFIG_MMU */
-
 
 #ifdef CONFIG_64BIT
 #define __put_user_8(x, ptr, err) \
        __put_user_asm("sd", x, ptr, err)
 #else /* !CONFIG_64BIT */
-#ifdef CONFIG_MMU
 #define __put_user_8(x, ptr, err)                              \
 do {                                                           \
        u32 __user *__ptr = (u32 __user *)(ptr);                \
@@ -329,7 +320,6 @@ do {                                                                \
                : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \
        __disable_user_access();                                \
 } while (0)
-#endif /* CONFIG_MMU */
 #endif /* CONFIG_64BIT */
 
 
@@ -438,7 +428,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
  * will set "err" to -EFAULT, while successful accesses return the previous
  * value.
  */
-#ifdef CONFIG_MMU
 #define __cmpxchg_user(ptr, old, new, err, size, lrb, scb)     \
 ({                                                             \
        __typeof__(ptr) __ptr = (ptr);                          \
@@ -508,6 +497,5 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
        (err) = __err;                                          \
        __ret;                                                  \
 })
-#endif /* CONFIG_MMU */
 
 #endif /* _ASM_RISCV_UACCESS_H */
index 9f250ed007cd816e523898a10f32ec6814da6892..2f704a5c4196e30fd2a9a9c3607eeeed3d6f019f 100644 (file)
@@ -14,3 +14,4 @@
 #define __ARCH_HAVE_MMU
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
+#include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h
deleted file mode 100644 (file)
index a2ccf18..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2017 SiFive
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
-#define _ASM_RISCV_VDSO_SYSCALLS_H
-
-#ifdef CONFIG_SMP
-
-/* These syscalls are only used by the vDSO and are not in the uapi. */
-#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
-__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
-
-#endif
-
-#endif /* _ASM_RISCV_VDSO_H */
diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/syscalls.h
new file mode 100644 (file)
index 0000000..818655b
--- /dev/null
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2017 SiFive
+ */
+
+#ifndef _ASM__UAPI__SYSCALLS_H
+#define _ASM__UAPI__SYSCALLS_H
+
+/*
+ * Allows the instruction cache to be flushed from userspace.  Despite RISC-V
+ * having a direct 'fence.i' instruction available to userspace (which we
+ * can't trap!), that's not actually viable when running on Linux because the
+ * kernel might schedule a process on another hart.  There is no way for
+ * userspace to handle this without invoking the kernel (as it doesn't know the
+ * thread->hart mappings), so we've defined a RISC-V specific system call to
+ * flush the instruction cache.
+ *
+ * __NR_riscv_flush_icache is defined to flush the instruction cache over an
+ * address range, with the flush applying to either all threads or just the
+ * caller.  We don't currently do anything with the address range, that's just
+ * in there for forwards compatibility.
+ */
+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
+__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
+
+#endif
index 20ee86f782a93b19779236f39a40daa3d9563ac5..7404ec222406290ed03c671a3b4463c61aa49c12 100644 (file)
@@ -196,7 +196,7 @@ handle_syscall:
        addi s2, s2, 0x4
        REG_S s2, PT_SEPC(sp)
        /* System calls run with interrupts enabled */
-       csrs sstatus, SR_IE
+       csrs sstatus, SR_SIE
        /* Trace syscalls, but only if requested by the user. */
        REG_L t0, TASK_TI_FLAGS(tp)
        andi t0, t0, _TIF_SYSCALL_TRACE
@@ -224,8 +224,8 @@ ret_from_syscall:
 
 ret_from_exception:
        REG_L s0, PT_SSTATUS(sp)
-       csrc sstatus, SR_IE
-       andi s0, s0, SR_PS
+       csrc sstatus, SR_SIE
+       andi s0, s0, SR_SPP
        bnez s0, restore_all
 
 resume_userspace:
@@ -255,7 +255,7 @@ work_pending:
        bnez s1, work_resched
 work_notifysig:
        /* Handle pending signals and notify-resume requests */
-       csrs sstatus, SR_IE /* Enable interrupts for do_notify_resume() */
+       csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */
        move a0, sp /* pt_regs */
        move a1, s0 /* current_thread_info->flags */
        tail do_notify_resume
index 0d90dcc1fbd36559823e48db2dc15ac4319b063d..d74d4adf2d54f94a4dff39c14d7953b8c0fdbab8 100644 (file)
@@ -76,7 +76,7 @@ void show_regs(struct pt_regs *regs)
 void start_thread(struct pt_regs *regs, unsigned long pc,
        unsigned long sp)
 {
-       regs->sstatus = SR_PIE /* User mode, irqs on */ | SR_FS_INITIAL;
+       regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
        regs->sepc = pc;
        regs->sp = sp;
        set_fs(USER_DS);
@@ -110,7 +110,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
                const register unsigned long gp __asm__ ("gp");
                memset(childregs, 0, sizeof(struct pt_regs));
                childregs->gp = gp;
-               childregs->sstatus = SR_PS | SR_PIE; /* Supervisor, irqs on */
+               childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
 
                p->thread.ra = (unsigned long)ret_from_kernel_thread;
                p->thread.s[0] = usp; /* fn */
index a5bd6401f95e6988a376406f02b7e39cb7c1352e..ade52b903a43f2b27546fdd216b87b7057bc95d4 100644 (file)
@@ -23,5 +23,4 @@
 void *sys_call_table[__NR_syscalls] = {
        [0 ... __NR_syscalls - 1] = sys_ni_syscall,
 #include <asm/unistd.h>
-#include <asm/vdso-syscalls.h>
 };
index b0fbad74e873ea9fff553424b975502b589d7097..023e4d4aef588e139fafcbc9e3fb1ddef0482575 100644 (file)
@@ -13,7 +13,6 @@
 
 #include <linux/linkage.h>
 #include <asm/unistd.h>
-#include <asm/vdso-syscalls.h>
 
        .text
 /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
index df2ca3c65048f9347781b05309c9552eaa7d0844..0713f3c67ab4242a2e54430331044724bf57f289 100644 (file)
@@ -63,7 +63,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
                goto vmalloc_fault;
 
        /* Enable interrupts if they were enabled in the parent context. */
-       if (likely(regs->sstatus & SR_PIE))
+       if (likely(regs->sstatus & SR_SPIE))
                local_irq_enable();
 
        /*
index 77c35350ee774d4fa26ed5bc57b70d9d03c7bcab..412326d59e6fcebd51469163f6476b88753aaa4c 100644 (file)
@@ -9,6 +9,7 @@
  */
 #include <linux/init.h>
 #include <linux/platform_device.h>
+#include <linux/sh_eth.h>
 #include <mach-se/mach/se.h>
 #include <mach-se/mach/mrshpc.h>
 #include <asm/machvec.h>
@@ -115,13 +116,23 @@ static struct platform_device heartbeat_device = {
 #if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\
        defined(CONFIG_CPU_SUBTYPE_SH7712)
 /* SH771X Ethernet driver */
+static struct sh_eth_plat_data sh_eth_plat = {
+       .phy = PHY_ID,
+       .phy_interface = PHY_INTERFACE_MODE_MII,
+};
+
 static struct resource sh_eth0_resources[] = {
        [0] = {
                .start = SH_ETH0_BASE,
-               .end = SH_ETH0_BASE + 0x1B8,
+               .end = SH_ETH0_BASE + 0x1B8 - 1,
                .flags = IORESOURCE_MEM,
        },
        [1] = {
+               .start = SH_TSU_BASE,
+               .end = SH_TSU_BASE + 0x200 - 1,
+               .flags = IORESOURCE_MEM,
+       },
+       [2] = {
                .start = SH_ETH0_IRQ,
                .end = SH_ETH0_IRQ,
                .flags = IORESOURCE_IRQ,
@@ -132,7 +143,7 @@ static struct platform_device sh_eth0_device = {
        .name = "sh771x-ether",
        .id = 0,
        .dev = {
-               .platform_data = PHY_ID,
+               .platform_data = &sh_eth_plat,
        },
        .num_resources = ARRAY_SIZE(sh_eth0_resources),
        .resource = sh_eth0_resources,
@@ -141,10 +152,15 @@ static struct platform_device sh_eth0_device = {
 static struct resource sh_eth1_resources[] = {
        [0] = {
                .start = SH_ETH1_BASE,
-               .end = SH_ETH1_BASE + 0x1B8,
+               .end = SH_ETH1_BASE + 0x1B8 - 1,
                .flags = IORESOURCE_MEM,
        },
        [1] = {
+               .start = SH_TSU_BASE,
+               .end = SH_TSU_BASE + 0x200 - 1,
+               .flags = IORESOURCE_MEM,
+       },
+       [2] = {
                .start = SH_ETH1_IRQ,
                .end = SH_ETH1_IRQ,
                .flags = IORESOURCE_IRQ,
@@ -155,7 +171,7 @@ static struct platform_device sh_eth1_device = {
        .name = "sh771x-ether",
        .id = 1,
        .dev = {
-               .platform_data = PHY_ID,
+               .platform_data = &sh_eth_plat,
        },
        .num_resources = ARRAY_SIZE(sh_eth1_resources),
        .resource = sh_eth1_resources,
index 4246ef9b07a346ed590be0cd3651f5a370ee700a..aa83fe1ff0b124c002e375e2ce5a83c7c853e29c 100644 (file)
 /* Base address */
 #define SH_ETH0_BASE 0xA7000000
 #define SH_ETH1_BASE 0xA7000400
+#define SH_TSU_BASE  0xA7000800
 /* PHY ID */
 #if defined(CONFIG_CPU_SUBTYPE_SH7710)
 # define PHY_ID 0x00
index d4fc98c50378c40bc901f6446d2bfff68151eb6a..20da391b5f329885e26b30e0351d73d571d28fc1 100644 (file)
@@ -55,7 +55,6 @@ config X86
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_KCOV                    if X86_64
        select ARCH_HAS_PMEM_API                if X86_64
-       # Causing hangs/crashes, see the commit that added this change for details.
        select ARCH_HAS_REFCOUNT
        select ARCH_HAS_UACCESS_FLUSHCACHE      if X86_64
        select ARCH_HAS_SET_MEMORY
@@ -89,6 +88,7 @@ config X86
        select GENERIC_CLOCKEVENTS_MIN_ADJUST
        select GENERIC_CMOS_UPDATE
        select GENERIC_CPU_AUTOPROBE
+       select GENERIC_CPU_VULNERABILITIES
        select GENERIC_EARLY_IOREMAP
        select GENERIC_FIND_FIRST_BIT
        select GENERIC_IOMAP
@@ -429,6 +429,19 @@ config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
 
+config RETPOLINE
+       bool "Avoid speculative indirect branches in kernel"
+       default y
+       help
+         Compile kernel with the retpoline compiler options to guard against
+         kernel-to-user data leaks by avoiding speculative indirect
+         branches. Requires a compiler with -mindirect-branch=thunk-extern
+         support for full protection. The kernel may run slower.
+
+         Without compiler support, at least indirect branches in assembler
+         code are eliminated. Since this includes the syscall entry path,
+         it is not entirely pointless.
+
 config INTEL_RDT
        bool "Intel Resource Director Technology support"
        default n
index 3e73bc255e4eb3edda965a6bc493e0e6ab8de354..fad55160dcb94a28e60d537d3d69d471a1e10e2e 100644 (file)
@@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
 #
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+    ifneq ($(RETPOLINE_CFLAGS),)
+        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+    endif
+endif
+
 archscripts: scripts_basic
        $(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
index 16627fec80b26b211ba3c4b50c3850c134e2bd62..3d09e3aca18dad33ba0c27c3673e49dcbfac0ce8 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/linkage.h>
 #include <asm/inst.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 /*
  * The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
        pxor INC, STATE4
        movdqu IV, 0x30(OUTP)
 
-       call *%r11
+       CALL_NOSPEC %r11
 
        movdqu 0x00(OUTP), INC
        pxor INC, STATE1
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
        _aesni_gf128mul_x_ble()
        movups IV, (IVP)
 
-       call *%r11
+       CALL_NOSPEC %r11
 
        movdqu 0x40(OUTP), INC
        pxor INC, STATE1
index f7c495e2863cb0daecb7b023e19e393cfc32c587..a14af6eb09cb074a1dcf42d0adaaf1c388ceb30b 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <linux/linkage.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
        vpxor 14 * 16(%rax), %xmm15, %xmm14;
        vpxor 15 * 16(%rax), %xmm15, %xmm15;
 
-       call *%r9;
+       CALL_NOSPEC %r9;
 
        addq $(16 * 16), %rsp;
 
index eee5b3982cfd3c6838a9a34534409df8935eba17..b66bbfa62f50d7c05ddb29b57a322979f64a6999 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
        vpxor 14 * 32(%rax), %ymm15, %ymm14;
        vpxor 15 * 32(%rax), %ymm15, %ymm15;
 
-       call *%r9;
+       CALL_NOSPEC %r9;
 
        addq $(16 * 32), %rsp;
 
index 7a7de27c6f415206f2c3b8e5a8a43d4468bc7aaa..d9b734d0c8cc78ecdc3293ca117546eb538a12fc 100644 (file)
@@ -45,6 +45,7 @@
 
 #include <asm/inst.h>
 #include <linux/linkage.h>
+#include <asm/nospec-branch.h>
 
 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 
@@ -172,7 +173,7 @@ continue_block:
        movzxw  (bufp, %rax, 2), len
        lea     crc_array(%rip), bufp
        lea     (bufp, len, 1), bufp
-       jmp     *bufp
+       JMP_NOSPEC bufp
 
        ################################################################
        ## 2a) PROCESS FULL BLOCKS:
index 45a63e00a6af9a12b4739246d6844ba94f766e71..3f48f695d5e6ac6546a009c734fcac517564b24d 100644 (file)
@@ -198,8 +198,11 @@ For 32-bit we have the following conventions - kernel is built with
  * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
  * halves:
  */
-#define PTI_SWITCH_PGTABLES_MASK       (1<<PAGE_SHIFT)
-#define PTI_SWITCH_MASK                (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+#define PTI_USER_PGTABLE_BIT           PAGE_SHIFT
+#define PTI_USER_PGTABLE_MASK          (1 << PTI_USER_PGTABLE_BIT)
+#define PTI_USER_PCID_BIT              X86_CR3_PTI_PCID_USER_BIT
+#define PTI_USER_PCID_MASK             (1 << PTI_USER_PCID_BIT)
+#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
 
 .macro SET_NOFLUSH_BIT reg:req
        bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
@@ -208,7 +211,7 @@ For 32-bit we have the following conventions - kernel is built with
 .macro ADJUST_KERNEL_CR3 reg:req
        ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
        /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
-       andq    $(~PTI_SWITCH_MASK), \reg
+       andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
 .endm
 
 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
@@ -239,15 +242,19 @@ For 32-bit we have the following conventions - kernel is built with
        /* Flush needed, clear the bit */
        btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
        movq    \scratch_reg2, \scratch_reg
-       jmp     .Lwrcr3_\@
+       jmp     .Lwrcr3_pcid_\@
 
 .Lnoflush_\@:
        movq    \scratch_reg2, \scratch_reg
        SET_NOFLUSH_BIT \scratch_reg
 
+.Lwrcr3_pcid_\@:
+       /* Flip the ASID to the user version */
+       orq     $(PTI_USER_PCID_MASK), \scratch_reg
+
 .Lwrcr3_\@:
-       /* Flip the PGD and ASID to the user version */
-       orq     $(PTI_SWITCH_MASK), \scratch_reg
+       /* Flip the PGD to the user version */
+       orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
        mov     \scratch_reg, %cr3
 .Lend_\@:
 .endm
@@ -263,17 +270,12 @@ For 32-bit we have the following conventions - kernel is built with
        movq    %cr3, \scratch_reg
        movq    \scratch_reg, \save_reg
        /*
-        * Is the "switch mask" all zero?  That means that both of
-        * these are zero:
-        *
-        *      1. The user/kernel PCID bit, and
-        *      2. The user/kernel "bit" that points CR3 to the
-        *         bottom half of the 8k PGD
-        *
-        * That indicates a kernel CR3 value, not a user CR3.
+        * Test the user pagetable bit. If set, then the user page tables
+        * are active. If clear CR3 already has the kernel page table
+        * active.
         */
-       testq   $(PTI_SWITCH_MASK), \scratch_reg
-       j     .Ldone_\@
+       bt      $PTI_USER_PGTABLE_BIT, \scratch_reg
+       jnc     .Ldone_\@
 
        ADJUST_KERNEL_CR3 \scratch_reg
        movq    \scratch_reg, %cr3
@@ -290,7 +292,7 @@ For 32-bit we have the following conventions - kernel is built with
         * KERNEL pages can always resume with NOFLUSH as we do
         * explicit flushes.
         */
-       bt      $X86_CR3_PTI_SWITCH_BIT, \save_reg
+       bt      $PTI_USER_PGTABLE_BIT, \save_reg
        jnc     .Lnoflush_\@
 
        /*
index ace8f321a5a1f2d1331cc4331a1922c9ed3d8bc1..60c4c342316cdf75263e9b13913c65bd7c5838f1 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
        .section .entry.text, "ax"
 
@@ -243,6 +244,17 @@ ENTRY(__switch_to_asm)
        movl    %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+       /*
+        * When switching from a shallower to a deeper call stack
+        * the RSB may either underflow or use entries populated
+        * with userspace addresses. On CPUs where those concerns
+        * exist, overwrite the RSB with entries which capture
+        * speculative execution to prevent attack.
+        */
+       FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
        /* restore callee-saved registers */
        popl    %esi
        popl    %edi
@@ -290,7 +302,7 @@ ENTRY(ret_from_fork)
 
        /* kernel thread */
 1:     movl    %edi, %eax
-       call    *%ebx
+       CALL_NOSPEC %ebx
        /*
         * A kernel thread is allowed to return here after successfully
         * calling do_execve().  Exit to userspace to complete the execve()
@@ -919,7 +931,7 @@ common_exception:
        movl    %ecx, %es
        TRACE_IRQS_OFF
        movl    %esp, %eax                      # pt_regs pointer
-       call    *%edi
+       CALL_NOSPEC %edi
        jmp     ret_from_exception
 END(common_exception)
 
index f048e384ff54e06530b657efc3b00cdf50f1ce5b..aa15b4c0e3d17dd64f513ce6a1915cdd24829016 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 #include <linux/err.h>
 
 #include "calling.h"
@@ -191,7 +192,7 @@ ENTRY(entry_SYSCALL_64_trampoline)
         */
        pushq   %rdi
        movq    $entry_SYSCALL_64_stage2, %rdi
-       jmp     *%rdi
+       JMP_NOSPEC %rdi
 END(entry_SYSCALL_64_trampoline)
 
        .popsection
@@ -270,7 +271,12 @@ entry_SYSCALL_64_fastpath:
         * It might end up jumping to the slow path.  If it jumps, RAX
         * and all argument registers are clobbered.
         */
+#ifdef CONFIG_RETPOLINE
+       movq    sys_call_table(, %rax, 8), %rax
+       call    __x86_indirect_thunk_rax
+#else
        call    *sys_call_table(, %rax, 8)
+#endif
 .Lentry_SYSCALL_64_after_fastpath_call:
 
        movq    %rax, RAX(%rsp)
@@ -442,7 +448,7 @@ ENTRY(stub_ptregs_64)
        jmp     entry_SYSCALL64_slow_path
 
 1:
-       jmp     *%rax                           /* Called from C */
+       JMP_NOSPEC %rax                         /* Called from C */
 END(stub_ptregs_64)
 
 .macro ptregs_stub func
@@ -485,6 +491,17 @@ ENTRY(__switch_to_asm)
        movq    %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+       /*
+        * When switching from a shallower to a deeper call stack
+        * the RSB may either underflow or use entries populated
+        * with userspace addresses. On CPUs where those concerns
+        * exist, overwrite the RSB with entries which capture
+        * speculative execution to prevent attack.
+        */
+       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
        /* restore callee-saved registers */
        popq    %r15
        popq    %r14
@@ -521,7 +538,7 @@ ENTRY(ret_from_fork)
 1:
        /* kernel thread */
        movq    %r12, %rdi
-       call    *%rbx
+       CALL_NOSPEC %rbx
        /*
         * A kernel thread is allowed to return here after successfully
         * calling do_execve().  Exit to userspace to complete the execve()
index 141e07b0621689e745582599c009f4af1d053c6c..24ffa1e88cf948ecbe9839f6d956c69a12ecd4f0 100644 (file)
@@ -582,6 +582,24 @@ static __init int bts_init(void)
        if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
                return -ENODEV;
 
+       if (boot_cpu_has(X86_FEATURE_PTI)) {
+               /*
+                * BTS hardware writes through a virtual memory map we must
+                * either use the kernel physical map, or the user mapping of
+                * the AUX buffer.
+                *
+                * However, since this driver supports per-CPU and per-task inherit
+                * we cannot use the user mapping since it will not be availble
+                * if we're not running the owning process.
+                *
+                * With PTI we can't use the kernal map either, because its not
+                * there when we run userspace.
+                *
+                * For now, disable this driver when using PTI.
+                */
+               return -ENODEV;
+       }
+
        bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
                                  PERF_PMU_CAP_EXCLUSIVE;
        bts_pmu.task_ctx_nr     = perf_sw_context;
index 005908ee9333f0e87cdd4db7ca14a278d52fa773..a2efb490f7435d137385c8dec0105f62ebd46264 100644 (file)
@@ -755,14 +755,14 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init),
 
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init),
-       X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,    hsw_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X,    hsx_rapl_init),
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT,  hsw_rapl_init),
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init),
 
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE,   hsw_rapl_init),
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E,   hsw_rapl_init),
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,      hsx_rapl_init),
-       X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init),
 
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
index a9e57f08bfa641d4cc9fb736bf2d1d4d22a6640c..98722773391db638438e22c2ef41e257b0f1f3fb 100644 (file)
@@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
 extern void sync_Arb_IDs(void);
+extern void init_bsp_APIC(void);
 extern void apic_intr_mode_init(void);
 extern void setup_local_APIC(void);
 extern void init_apic_mappings(void);
index ff700d81e91efcf89a2f2ac725b3e80d868c8621..0927cdc4f9460165a9159b1c4f7bea0e759bdbb9 100644 (file)
 #include <asm/pgtable.h>
 #include <asm/special_insns.h>
 #include <asm/preempt.h>
+#include <asm/asm.h>
 
 #ifndef CONFIG_X86_CMPXCHG64
 extern void cmpxchg8b_emu(void);
 #endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+INDIRECT_THUNK(sp)
+#endif /* CONFIG_RETPOLINE */
index 21ac898df2d8e478b6e214f3e816fde937ecfd03..25b9375c1484b48f37c5e8f1d7fb286203491d1d 100644 (file)
 #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME                        ( 7*32+10) /* AMD Secure Memory Encryption */
 #define X86_FEATURE_PTI                        ( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE          ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD      ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN         ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_INTEL_PT           ( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW      ( 7*32+16) /* AVX-512 Neural Network Instructions */
 #define X86_FEATURE_AVX512_4FMAPS      ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 #define X86_FEATURE_MBA                        ( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW          ( 7*32+19) /* Fill RSB on context switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW         ( 8*32+ 0) /* Intel TPR Shadow */
 #define X86_FEATURE_AVX512IFMA         ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
 #define X86_FEATURE_CLFLUSHOPT         ( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB               ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT           ( 9*32+25) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512PF           ( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER           ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
 #define X86_FEATURE_AVX512CD           ( 9*32+28) /* AVX-512 Conflict Detection */
 #define X86_BUG_MONITOR                        X86_BUG(12) /* IPI required to wake up remote CPU */
 #define X86_BUG_AMD_E400               X86_BUG(13) /* CPU is among the affected by Erratum 400 */
 #define X86_BUG_CPU_MELTDOWN           X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1             X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2             X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
index c9459a4c3c680b754365cd8531b5c25f51b5fb54..22c5f3e6f8201aa03cbbbcd43e37e6bc30de6a35 100644 (file)
@@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data);
 
 void __init sme_early_init(void);
 
-void __init sme_encrypt_kernel(void);
+void __init sme_encrypt_kernel(struct boot_params *bp);
 void __init sme_enable(struct boot_params *bp);
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
@@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
 
 static inline void __init sme_early_init(void) { }
 
-static inline void __init sme_encrypt_kernel(void) { }
+static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
 static inline void __init sme_enable(struct boot_params *bp) { }
 
 static inline bool sme_active(void) { return false; }
index 5400add2885b9646fa7d8d53c5b35e16c2794c7d..8bf450b13d9f53883db9ee89ac6d39913963907e 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/nmi.h>
 #include <asm/io.h>
 #include <asm/hyperv.h>
+#include <asm/nospec-branch.h>
 
 /*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
                return U64_MAX;
 
        __asm__ __volatile__("mov %4, %%r8\n"
-                            "call *%5"
+                            CALL_NOSPEC
                             : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                               "+c" (control), "+d" (input_address)
-                            :  "r" (output_address), "m" (hv_hypercall_pg)
+                            :  "r" (output_address),
+                               THUNK_TARGET(hv_hypercall_pg)
                             : "cc", "memory", "r8", "r9", "r10", "r11");
 #else
        u32 input_address_hi = upper_32_bits(input_address);
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
        if (!hv_hypercall_pg)
                return U64_MAX;
 
-       __asm__ __volatile__("call *%7"
+       __asm__ __volatile__(CALL_NOSPEC
                             : "=A" (hv_status),
                               "+c" (input_address_lo), ASM_CALL_CONSTRAINT
                             : "A" (control),
                               "b" (input_address_hi),
                               "D"(output_address_hi), "S"(output_address_lo),
-                              "m" (hv_hypercall_pg)
+                              THUNK_TARGET(hv_hypercall_pg)
                             : "cc", "memory");
 #endif /* !x86_64 */
        return hv_status;
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 
 #ifdef CONFIG_X86_64
        {
-               __asm__ __volatile__("call *%4"
+               __asm__ __volatile__(CALL_NOSPEC
                                     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                                       "+c" (control), "+d" (input1)
-                                    : "m" (hv_hypercall_pg)
+                                    : THUNK_TARGET(hv_hypercall_pg)
                                     : "cc", "r8", "r9", "r10", "r11");
        }
 #else
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
                u32 input1_hi = upper_32_bits(input1);
                u32 input1_lo = lower_32_bits(input1);
 
-               __asm__ __volatile__ ("call *%5"
+               __asm__ __volatile__ (CALL_NOSPEC
                                      : "=A"(hv_status),
                                        "+c"(input1_lo),
                                        ASM_CALL_CONSTRAINT
                                      : "A" (control),
                                        "b" (input1_hi),
-                                       "m" (hv_hypercall_pg)
+                                       THUNK_TARGET(hv_hypercall_pg)
                                      : "cc", "edi", "esi");
        }
 #endif
index 34c4922bbc3fb5ed95cb0819d7ac6b9cd37a7f41..e7b983a355060a6c5b5db76f1fc18475eb647ae5 100644 (file)
 #define FAM10H_MMIO_CONF_BASE_MASK     0xfffffffULL
 #define FAM10H_MMIO_CONF_BASE_SHIFT    20
 #define MSR_FAM10H_NODE_ID             0xc001100c
+#define MSR_F10H_DECFG                 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT    1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE                BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
 
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1                        0xc001001a
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644 (file)
index 0000000..7b45d84
--- /dev/null
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS         16      /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)      \
+       mov     $(nr/2), reg;                   \
+771:                                           \
+       call    772f;                           \
+773:   /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     773b;                           \
+772:                                           \
+       call    774f;                           \
+775:   /* speculation trap */                  \
+       pause;                                  \
+       lfence;                                 \
+       jmp     775b;                           \
+774:                                           \
+       dec     reg;                            \
+       jnz     771b;                           \
+       add     $(BITS_PER_LONG/8) * nr, sp;
+
+#ifdef __ASSEMBLY__
+
+/*
+ * This should be used immediately before a retpoline alternative.  It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+.macro ANNOTATE_NOSPEC_ALTERNATIVE
+       .Lannotate_\@:
+       .pushsection .discard.nospec
+       .long .Lannotate_\@ - .
+       .popsection
+.endm
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+       call    .Ldo_rop_\@
+.Lspec_trap_\@:
+       pause
+       lfence
+       jmp     .Lspec_trap_\@
+.Ldo_rop_\@:
+       mov     \reg, (%_ASM_SP)
+       ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+       jmp     .Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+       RETPOLINE_JMP \reg
+.Ldo_call_\@:
+       call    .Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE_2 __stringify(jmp *\reg),                           \
+               __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+               __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+       jmp     *\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE_2 __stringify(call *\reg),                          \
+               __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+               __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+       call    *\reg
+#endif
+.endm
+
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE "jmp .Lskip_rsb_\@",                                \
+               __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))    \
+               \ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define ANNOTATE_NOSPEC_ALTERNATIVE                            \
+       "999:\n\t"                                              \
+       ".pushsection .discard.nospec\n\t"                      \
+       ".long 999b - .\n\t"                                    \
+       ".popsection\n\t"
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC                                           \
+       ANNOTATE_NOSPEC_ALTERNATIVE                             \
+       ALTERNATIVE(                                            \
+       "call *%[thunk_target]\n",                              \
+       "call __x86_indirect_thunk_%V[thunk_target]\n",         \
+       X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",    \
+       "       jmp    904f;\n"                                 \
+       "       .align 16\n"                                    \
+       "901:   call   903f;\n"                                 \
+       "902:   pause;\n"                                       \
+       "       lfence;\n"                                      \
+       "       jmp    902b;\n"                                 \
+       "       .align 16\n"                                    \
+       "903:   addl   $4, %%esp;\n"                            \
+       "       pushl  %[thunk_target];\n"                      \
+       "       ret;\n"                                         \
+       "       .align 16\n"                                    \
+       "904:   call   901b;\n",                                \
+       X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+       SPECTRE_V2_NONE,
+       SPECTRE_V2_RETPOLINE_MINIMAL,
+       SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+       SPECTRE_V2_RETPOLINE_GENERIC,
+       SPECTRE_V2_RETPOLINE_AMD,
+       SPECTRE_V2_IBRS,
+};
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+       unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+       asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+                     ALTERNATIVE("jmp 910f",
+                                 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+                                 X86_FEATURE_RETPOLINE)
+                     "910:"
+                     : "=&r" (loops), ASM_CALL_CONSTRAINT
+                     : "r" (loops) : "memory" );
+#endif
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
index 7a5d6695abd37e737bfb0e532ec4d86c53256433..eb66fa9cd0fc6187d55dc0c1a8fb30e5e32687a8 100644 (file)
@@ -38,6 +38,7 @@ do {                                          \
 #define PCI_NOASSIGN_ROMS      0x80000
 #define PCI_ROOT_NO_CRS                0x100000
 #define PCI_NOASSIGN_BARS      0x200000
+#define PCI_BIG_ROOT_WINDOW    0x400000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
index 6a60fea90b9d9dd669033e4d7eee627b99a0c3d5..625a52a5594f53b3ddc889843b92d228cc39a8e8 100644 (file)
@@ -40,7 +40,7 @@
 #define CR3_NOFLUSH    BIT_ULL(63)
 
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
-# define X86_CR3_PTI_SWITCH_BIT        11
+# define X86_CR3_PTI_PCID_USER_BIT     11
 #endif
 
 #else
index 4a08dd2ab32ade77dcbfff6a8e8b77f5d5e43764..d33e4a26dc7ed2db51687ddb4ca7ee3a35ddf835 100644 (file)
@@ -81,13 +81,13 @@ static inline u16 kern_pcid(u16 asid)
         * Make sure that the dynamic ASID space does not confict with the
         * bit we are using to switch between user and kernel ASIDs.
         */
-       BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
+       BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
 
        /*
         * The ASID being passed in here should have respected the
         * MAX_ASID_AVAILABLE and thus never have the switch bit set.
         */
-       VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
+       VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
 #endif
        /*
         * The dynamically-assigned ASIDs that get passed in are small
@@ -112,7 +112,7 @@ static inline u16 user_pcid(u16 asid)
 {
        u16 ret = kern_pcid(asid);
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
-       ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
+       ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
 #endif
        return ret;
 }
index 7cb282e9e58777aeb2ffd86b344d39a5189c5f84..bfd882617613923f0db79c4aae82690bda7a8d4f 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/smap.h>
+#include <asm/nospec-branch.h>
 
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
        __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
 
        stac();
-       asm volatile("call *%[call]"
+       asm volatile(CALL_NOSPEC
                     : __HYPERCALL_5PARAM
-                    : [call] "a" (&hypercall_page[call])
+                    : [thunk_target] "a" (&hypercall_page[call])
                     : __HYPERCALL_CLOBBER5);
        clac();
 
index dbaf14d69ebd510b86a4a0e9e83bbd1862d99d14..4817d743c26359c697559053a23518a5e25b241b 100644 (file)
@@ -344,9 +344,12 @@ done:
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
        unsigned long flags;
+       int i;
 
-       if (instr[0] != 0x90)
-               return;
+       for (i = 0; i < a->padlen; i++) {
+               if (instr[i] != 0x90)
+                       return;
+       }
 
        local_irq_save(flags);
        add_nops(instr + (a->instrlen - a->padlen), a->padlen);
index 880441f2414610298002c34652b96e571772e08f..25ddf02598d20a89cb1da2243aba687e6eee7657 100644 (file)
@@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void)
        return APIC_SYMMETRIC_IO;
 }
 
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+       unsigned int value;
+
+       /*
+        * Don't do the setup now if we have a SMP BIOS as the
+        * through-I/O-APIC virtual wire mode might be active.
+        */
+       if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
+               return;
+
+       /*
+        * Do not trust the local APIC being empty at bootup.
+        */
+       clear_local_APIC();
+
+       /*
+        * Enable APIC.
+        */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_VECTOR_MASK;
+       value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+       /* This bit is reserved on P4/Xeon and should be cleared */
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+           (boot_cpu_data.x86 == 15))
+               value &= ~APIC_SPIV_FOCUS_DISABLED;
+       else
+#endif
+               value |= APIC_SPIV_FOCUS_DISABLED;
+       value |= SPURIOUS_APIC_VECTOR;
+       apic_write(APIC_SPIV, value);
+
+       /*
+        * Set up the virtual wire mode.
+        */
+       apic_write(APIC_LVT0, APIC_DM_EXTINT);
+       value = APIC_DM_NMI;
+       if (!lapic_is_integrated())             /* 82489DX */
+               value |= APIC_LVT_LEVEL_TRIGGER;
+       if (apic_extnmi == APIC_EXTNMI_NONE)
+               value |= APIC_LVT_MASKED;
+       apic_write(APIC_LVT1, value);
+}
+
 /* Init the interrupt delivery mode for the BSP */
 void __init apic_intr_mode_init(void)
 {
index f8b03bb8e72560a436dbad677ddeed30f19531cf..3cc471beb50b499d89148bbdee37c697c596f743 100644 (file)
@@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 
                err = assign_irq_vector_policy(irqd, info);
                trace_vector_setup(virq + i, false, err);
-               if (err)
+               if (err) {
+                       irqd->chip_data = NULL;
+                       free_apic_chip_data(apicd);
                        goto error;
+               }
        }
 
        return 0;
 
 error:
-       x86_vector_free_irqs(domain, virq, i + 1);
+       x86_vector_free_irqs(domain, virq, i);
        return err;
 }
 
index bcb75dc97d44075d2eecb3137b91f934072352b0..ea831c85819583c3dd0b0a48c2a31518eaa67b44 100644 (file)
@@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
                set_cpu_cap(c, X86_FEATURE_K8);
 
        if (cpu_has(c, X86_FEATURE_XMM2)) {
-               /* MFENCE stops RDTSC speculation */
-               set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+               unsigned long long val;
+               int ret;
+
+               /*
+                * A serializing LFENCE has less overhead than MFENCE, so
+                * use it for execution serialization.  On families which
+                * don't have that MSR, LFENCE is already serializing.
+                * msr_set_bit() uses the safe accessors, too, even if the MSR
+                * is not present.
+                */
+               msr_set_bit(MSR_F10H_DECFG,
+                           MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+               /*
+                * Verify that the MSR write was successful (could be running
+                * under a hypervisor) and only then assume that LFENCE is
+                * serializing.
+                */
+               ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+               if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+                       /* A serializing LFENCE stops RDTSC speculation */
+                       set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+               } else {
+                       /* MFENCE stops RDTSC speculation */
+                       set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+               }
        }
 
        /*
index ba0b2424c9b050100ac6eeef97c4aef45181d455..390b3dc3d43827a88537e3208462755b565cb838 100644 (file)
  */
 #include <linux/init.h>
 #include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
 #include <asm/bugs.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
@@ -19,6 +23,9 @@
 #include <asm/alternative.h>
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
+#include <asm/intel-family.h>
+
+static void __init spectre_v2_select_mitigation(void);
 
 void __init check_bugs(void)
 {
@@ -29,6 +36,9 @@ void __init check_bugs(void)
                print_cpu_info(&boot_cpu_data);
        }
 
+       /* Select the proper spectre mitigation before patching alternatives */
+       spectre_v2_select_mitigation();
+
 #ifdef CONFIG_X86_32
        /*
         * Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +70,214 @@ void __init check_bugs(void)
                set_memory_4k((unsigned long)__va(0), 1);
 #endif
 }
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+       SPECTRE_V2_CMD_NONE,
+       SPECTRE_V2_CMD_AUTO,
+       SPECTRE_V2_CMD_FORCE,
+       SPECTRE_V2_CMD_RETPOLINE,
+       SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+       SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+       [SPECTRE_V2_NONE]                       = "Vulnerable",
+       [SPECTRE_V2_RETPOLINE_MINIMAL]          = "Vulnerable: Minimal generic ASM retpoline",
+       [SPECTRE_V2_RETPOLINE_MINIMAL_AMD]      = "Vulnerable: Minimal AMD ASM retpoline",
+       [SPECTRE_V2_RETPOLINE_GENERIC]          = "Mitigation: Full generic retpoline",
+       [SPECTRE_V2_RETPOLINE_AMD]              = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+       if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+               pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+               pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+       return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+       int len = strlen(opt);
+
+       return len == arglen && !strncmp(arg, opt, len);
+}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+       char arg[20];
+       int ret;
+
+       ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+                                 sizeof(arg));
+       if (ret > 0)  {
+               if (match_option(arg, ret, "off")) {
+                       goto disable;
+               } else if (match_option(arg, ret, "on")) {
+                       spec2_print_if_secure("force enabled on command line.");
+                       return SPECTRE_V2_CMD_FORCE;
+               } else if (match_option(arg, ret, "retpoline")) {
+                       spec2_print_if_insecure("retpoline selected on command line.");
+                       return SPECTRE_V2_CMD_RETPOLINE;
+               } else if (match_option(arg, ret, "retpoline,amd")) {
+                       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+                               pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+                               return SPECTRE_V2_CMD_AUTO;
+                       }
+                       spec2_print_if_insecure("AMD retpoline selected on command line.");
+                       return SPECTRE_V2_CMD_RETPOLINE_AMD;
+               } else if (match_option(arg, ret, "retpoline,generic")) {
+                       spec2_print_if_insecure("generic retpoline selected on command line.");
+                       return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+               } else if (match_option(arg, ret, "auto")) {
+                       return SPECTRE_V2_CMD_AUTO;
+               }
+       }
+
+       if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+               return SPECTRE_V2_CMD_AUTO;
+disable:
+       spec2_print_if_insecure("disabled on command line.");
+       return SPECTRE_V2_CMD_NONE;
+}
+
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+           boot_cpu_data.x86 == 6) {
+               switch (boot_cpu_data.x86_model) {
+               case INTEL_FAM6_SKYLAKE_MOBILE:
+               case INTEL_FAM6_SKYLAKE_DESKTOP:
+               case INTEL_FAM6_SKYLAKE_X:
+               case INTEL_FAM6_KABYLAKE_MOBILE:
+               case INTEL_FAM6_KABYLAKE_DESKTOP:
+                       return true;
+               }
+       }
+       return false;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+       enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+       enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+       /*
+        * If the CPU is not affected and the command line mode is NONE or AUTO
+        * then nothing to do.
+        */
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+           (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+               return;
+
+       switch (cmd) {
+       case SPECTRE_V2_CMD_NONE:
+               return;
+
+       case SPECTRE_V2_CMD_FORCE:
+               /* FALLTRHU */
+       case SPECTRE_V2_CMD_AUTO:
+               goto retpoline_auto;
+
+       case SPECTRE_V2_CMD_RETPOLINE_AMD:
+               if (IS_ENABLED(CONFIG_RETPOLINE))
+                       goto retpoline_amd;
+               break;
+       case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+               if (IS_ENABLED(CONFIG_RETPOLINE))
+                       goto retpoline_generic;
+               break;
+       case SPECTRE_V2_CMD_RETPOLINE:
+               if (IS_ENABLED(CONFIG_RETPOLINE))
+                       goto retpoline_auto;
+               break;
+       }
+       pr_err("kernel not compiled with retpoline; no mitigation available!");
+       return;
+
+retpoline_auto:
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+       retpoline_amd:
+               if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+                       pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+                       goto retpoline_generic;
+               }
+               mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+                                        SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+       } else {
+       retpoline_generic:
+               mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+                                        SPECTRE_V2_RETPOLINE_MINIMAL;
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+       }
+
+       spectre_v2_enabled = mode;
+       pr_info("%s\n", spectre_v2_strings[mode]);
+
+       /*
+        * If neither SMEP or KPTI are available, there is a risk of
+        * hitting userspace addresses in the RSB after a context switch
+        * from a shallow call stack to a deeper one. To prevent this fill
+        * the entire RSB, even when using IBRS.
+        *
+        * Skylake era CPUs have a separate issue with *underflow* of the
+        * RSB, when they will predict 'ret' targets from the generic BTB.
+        * The proper mitigation for this is IBRS. If IBRS is not supported
+        * or deactivated in favour of retpolines the RSB fill on context
+        * switch is required.
+        */
+       if ((!boot_cpu_has(X86_FEATURE_PTI) &&
+            !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
+               setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+               pr_info("Filling RSB on context switch\n");
+       }
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+                         struct device_attribute *attr, char *buf)
+{
+       if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               return sprintf(buf, "Not affected\n");
+       if (boot_cpu_has(X86_FEATURE_PTI))
+               return sprintf(buf, "Mitigation: PTI\n");
+       return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+               return sprintf(buf, "Not affected\n");
+       return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+               return sprintf(buf, "Not affected\n");
+
+       return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif
index 39d7ea865207d102d2f03fc9ca779b2bdbb03c61..ef29ad001991d6acdd5b59cd707d85f9ff99f9ea 100644 (file)
@@ -926,6 +926,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
        if (c->x86_vendor != X86_VENDOR_AMD)
                setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
 
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
        fpu__init_system(c);
 
 #ifdef CONFIG_X86_32
index 88dcf8479013569e93a58c278badaf690c29a27b..99442370de40d7b2fedcd3777d7865bf929877ff 100644 (file)
@@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
                 */
                if (static_branch_unlikely(&rdt_mon_enable_key))
                        rmdir_mondata_subdir_allrdtgrp(r, d->id);
-               kfree(d->ctrl_val);
-               kfree(d->rmid_busy_llc);
-               kfree(d->mbm_total);
-               kfree(d->mbm_local);
                list_del(&d->list);
                if (is_mbm_enabled())
                        cancel_delayed_work(&d->mbm_over);
@@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
                        cancel_delayed_work(&d->cqm_limbo);
                }
 
+               kfree(d->ctrl_val);
+               kfree(d->rmid_busy_llc);
+               kfree(d->mbm_total);
+               kfree(d->mbm_local);
                kfree(d);
                return;
        }
index 8ccdca6d3f9e9b876ee27f021ed8c021b1168220..d9e460fc7a3b309327c8f2899cc54a7be2b9dc6b 100644 (file)
@@ -910,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
 {
        struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-       if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
-               pr_err_once("late loading on model 79 is disabled.\n");
+       /*
+        * Late loading on model 79 with microcode revision less than 0x0b000021
+        * may result in a system hang. This behavior is documented in item
+        * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+        */
+       if (c->x86 == 6 &&
+           c->x86_model == INTEL_FAM6_BROADWELL_X &&
+           c->x86_mask == 0x01 &&
+           c->microcode < 0x0b000021) {
+               pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+               pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
                return true;
        }
 
index 05459ad3db46e2139b7d97514899d398c321c541..d0e69769abfd39efea1d3b15fbf4f49b742b700f 100644 (file)
@@ -21,7 +21,6 @@ struct cpuid_bit {
 static const struct cpuid_bit cpuid_bits[] = {
        { X86_FEATURE_APERFMPERF,       CPUID_ECX,  0, 0x00000006, 0 },
        { X86_FEATURE_EPB,              CPUID_ECX,  3, 0x00000006, 0 },
-       { X86_FEATURE_INTEL_PT,         CPUID_EBX, 25, 0x00000007, 0 },
        { X86_FEATURE_AVX512_4VNNIW,    CPUID_EDX,  2, 0x00000007, 0 },
        { X86_FEATURE_AVX512_4FMAPS,    CPUID_EDX,  3, 0x00000007, 0 },
        { X86_FEATURE_CAT_L3,           CPUID_EBX,  1, 0x00000010, 0 },
index b6c6468e10bc96625c25c4ae8d19b734af81366b..4c8440de33559942a98245cf123a5ed10a30e406 100644 (file)
@@ -8,6 +8,7 @@
 #include <asm/segment.h>
 #include <asm/export.h>
 #include <asm/ftrace.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CC_USING_FENTRY
 # define function_hook __fentry__
@@ -197,7 +198,8 @@ ftrace_stub:
        movl    0x4(%ebp), %edx
        subl    $MCOUNT_INSN_SIZE, %eax
 
-       call    *ftrace_trace_function
+       movl    ftrace_trace_function, %ecx
+       CALL_NOSPEC %ecx
 
        popl    %edx
        popl    %ecx
@@ -241,5 +243,5 @@ return_to_handler:
        movl    %eax, %ecx
        popl    %edx
        popl    %eax
-       jmp     *%ecx
+       JMP_NOSPEC %ecx
 #endif
index c832291d948a6b4fd487a3e42fab8b8f7b9dd244..7cb8ba08beb997ef66724e2db5e66021c5ce32ee 100644 (file)
@@ -7,7 +7,7 @@
 #include <asm/ptrace.h>
 #include <asm/ftrace.h>
 #include <asm/export.h>
-
+#include <asm/nospec-branch.h>
 
        .code64
        .section .entry.text, "ax"
@@ -286,8 +286,8 @@ trace:
         * ip and parent ip are used and the list function is called when
         * function tracing is enabled.
         */
-       call   *ftrace_trace_function
-
+       movq ftrace_trace_function, %r8
+       CALL_NOSPEC %r8
        restore_mcount_regs
 
        jmp fgraph_trace
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
        movq 8(%rsp), %rdx
        movq (%rsp), %rax
        addq $24, %rsp
-       jmp *%rdi
+       JMP_NOSPEC %rdi
 #endif
index 6a5d757b9cfdc7c9bf4b815c0c13197ecbd32719..7ba5d819ebe3b351ff4124ded8c598adfcb1a5f1 100644 (file)
@@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
        p = fixup_pointer(&phys_base, physaddr);
        *p += load_delta - sme_get_me_mask();
 
-       /* Encrypt the kernel (if SME is active) */
-       sme_encrypt_kernel();
+       /* Encrypt the kernel and related (if SME is active) */
+       sme_encrypt_kernel(bp);
 
        /*
         * Return the SME encryption mask (if SME is active) to be used as a
index d985cef3984ff030e3a83d50eaa9dc45f0a64a82..56d99be3706a23b600a50d2ae366b3a8fde251ed 100644 (file)
@@ -56,7 +56,7 @@ struct idt_data {
  * Early traps running on the DEFAULT_STACK because the other interrupt
  * stacks work only after cpu_init().
  */
-static const __initdata struct idt_data early_idts[] = {
+static const __initconst struct idt_data early_idts[] = {
        INTG(X86_TRAP_DB,               debug),
        SYSG(X86_TRAP_BP,               int3),
 #ifdef CONFIG_X86_32
@@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = {
  * the traps which use them are reinitialized with IST after cpu_init() has
  * set up TSS.
  */
-static const __initdata struct idt_data def_idts[] = {
+static const __initconst struct idt_data def_idts[] = {
        INTG(X86_TRAP_DE,               divide_error),
        INTG(X86_TRAP_NMI,              nmi),
        INTG(X86_TRAP_BR,               bounds),
@@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = {
 /*
  * The APIC and SMP idt entries
  */
-static const __initdata struct idt_data apic_idts[] = {
+static const __initconst struct idt_data apic_idts[] = {
 #ifdef CONFIG_SMP
        INTG(RESCHEDULE_VECTOR,         reschedule_interrupt),
        INTG(CALL_FUNCTION_VECTOR,      call_function_interrupt),
@@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = {
  * Early traps running on the DEFAULT_STACK because the other interrupt
  * stacks work only after cpu_init().
  */
-static const __initdata struct idt_data early_pf_idts[] = {
+static const __initconst struct idt_data early_pf_idts[] = {
        INTG(X86_TRAP_PF,               page_fault),
 };
 
@@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = {
  * Override for the debug_idt. Same as the default, but with interrupt
  * stack set to DEFAULT_STACK (0). Required for NMI trap handling.
  */
-static const __initdata struct idt_data dbg_idts[] = {
+static const __initconst struct idt_data dbg_idts[] = {
        INTG(X86_TRAP_DB,       debug),
        INTG(X86_TRAP_BP,       int3),
 };
@@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
  * The exceptions which use Interrupt stacks. They are setup after
  * cpu_init() when the TSS has been initialized.
  */
-static const __initdata struct idt_data ist_idts[] = {
+static const __initconst struct idt_data ist_idts[] = {
        ISTG(X86_TRAP_DB,       debug,          DEBUG_STACK),
        ISTG(X86_TRAP_NMI,      nmi,            NMI_STACK),
        SISTG(X86_TRAP_BP,      int3,           DEBUG_STACK),
index a83b3346a0e104833793687aea68f7f216cf0374..c1bdbd3d3232cb83d07bfa4ce604ae0b620c796a 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 
 #include <asm/apic.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
 static void call_on_stack(void *func, void *stack)
 {
        asm volatile("xchgl     %%ebx,%%esp     \n"
-                    "call      *%%edi          \n"
+                    CALL_NOSPEC
                     "movl      %%ebx,%%esp     \n"
                     : "=b" (stack)
                     : "0" (stack),
-                      "D"(func)
+                      [thunk_target] "D"(func)
                     : "memory", "cc", "edx", "ecx", "eax");
 }
 
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
                call_on_stack(print_stack_overflow, isp);
 
     &nbs