Merge branches 'arm/renesas', 'arm/omap', 'arm/exynos', 'x86/amd', 'x86/vt-d' and...
authorJoerg Roedel <jroedel@suse.de>
Wed, 17 Jan 2018 14:29:14 +0000 (15:29 +0100)
committerJoerg Roedel <jroedel@suse.de>
Wed, 17 Jan 2018 14:29:14 +0000 (15:29 +0100)
858 files changed:
.mailmap
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/admin-guide/kernel-parameters.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/thunderbolt.rst
Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
Documentation/devicetree/bindings/sound/da7218.txt
Documentation/devicetree/bindings/sound/da7219.txt
Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
Documentation/filesystems/nilfs2.txt
Documentation/gpu/i915.rst
Documentation/kbuild/kconfig-language.txt
Documentation/networking/index.rst
Documentation/networking/msg_zerocopy.rst
Documentation/usb/gadget-testing.txt
Documentation/x86/pti.txt [new file with mode: 0644]
Documentation/x86/x86_64/mm.txt
MAINTAINERS
Makefile
arch/arc/boot/dts/axc003.dtsi
arch/arc/boot/dts/axc003_idu.dtsi
arch/arc/boot/dts/hsdk.dts
arch/arc/configs/hsdk_defconfig
arch/arc/include/asm/uaccess.h
arch/arc/kernel/setup.c
arch/arc/kernel/stacktrace.c
arch/arc/kernel/traps.c
arch/arc/kernel/troubleshoot.c
arch/arc/plat-axs10x/axs10x.c
arch/arc/plat-hsdk/platform.c
arch/arm/boot/dts/aspeed-g4.dtsi
arch/arm/boot/dts/at91-tse850-3.dts
arch/arm/boot/dts/da850-lego-ev3.dts
arch/arm/boot/dts/exynos5800-peach-pi.dts
arch/arm/boot/dts/ls1021a-qds.dts
arch/arm/boot/dts/ls1021a-twr.dts
arch/arm/boot/dts/rk3066a-marsboard.dts
arch/arm/boot/dts/rk3288.dtsi
arch/arm/boot/dts/sun4i-a10.dtsi
arch/arm/boot/dts/sun5i-a10s.dtsi
arch/arm/boot/dts/sun6i-a31.dtsi
arch/arm/boot/dts/sun7i-a20.dtsi
arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
arch/arm/boot/dts/tango4-common.dtsi
arch/arm/kernel/traps.c
arch/arm/lib/csumpartialcopyuser.S
arch/arm/mach-davinci/dm365.c
arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts
arch/arm64/boot/dts/allwinner/sun50i-a64-pine64.dts
arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi
arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus2.dts
arch/arm64/boot/dts/renesas/salvator-common.dtsi
arch/arm64/boot/dts/renesas/ulcb.dtsi
arch/arm64/boot/dts/rockchip/rk3328-rock64.dts
arch/arm64/boot/dts/rockchip/rk3328.dtsi
arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
arch/arm64/kvm/hyp/debug-sr.c
arch/ia64/kernel/time.c
arch/m32r/kernel/traps.c
arch/mips/kernel/cps-vec.S
arch/mips/kernel/process.c
arch/mips/kernel/ptrace.c
arch/parisc/boot/compressed/misc.c
arch/parisc/include/asm/ldcw.h
arch/parisc/include/asm/thread_info.h
arch/parisc/kernel/drivers.c
arch/parisc/kernel/entry.S
arch/parisc/kernel/hpmc.S
arch/parisc/kernel/pacache.S
arch/parisc/kernel/process.c
arch/parisc/kernel/unwind.c
arch/parisc/lib/delay.c
arch/parisc/mm/init.c
arch/powerpc/include/asm/exception-64e.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/feature-fixups.h
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/plpar_wrappers.h
arch/powerpc/include/asm/setup.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/process.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/vmlinux.lds.S
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/book3s_rmhandlers.S
arch/powerpc/kvm/book3s_segment.S
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/lib/feature-fixups.c
arch/powerpc/mm/fault.c
arch/powerpc/net/bpf_jit_comp64.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/imc-pmu.c
arch/powerpc/platforms/powernv/setup.c
arch/powerpc/platforms/pseries/dlpar.c
arch/powerpc/platforms/pseries/pseries.h
arch/powerpc/platforms/pseries/ras.c
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/sysdev/fsl_msi.c
arch/riscv/configs/defconfig
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/io.h
arch/riscv/include/asm/irqflags.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/ptrace.h
arch/riscv/include/asm/tlbflush.h
arch/riscv/include/asm/uaccess.h
arch/riscv/include/asm/unistd.h
arch/riscv/include/asm/vdso-syscalls.h [deleted file]
arch/riscv/include/uapi/asm/syscalls.h [new file with mode: 0644]
arch/riscv/kernel/entry.S
arch/riscv/kernel/process.c
arch/riscv/kernel/syscall_table.c
arch/riscv/kernel/vdso/flush_icache.S
arch/riscv/mm/fault.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/priv.c
arch/s390/lib/uaccess.c
arch/s390/net/bpf_jit_comp.c
arch/s390/pci/pci_dma.c
arch/s390/pci/pci_insn.c
arch/sh/boards/mach-se/770x/setup.c
arch/sh/include/mach-se/mach/se.h
arch/sparc/lib/hweight.S
arch/sparc/mm/fault_32.c
arch/sparc/mm/fault_64.c
arch/sparc/net/bpf_jit_comp_64.c
arch/um/include/asm/mmu_context.h
arch/um/kernel/trap.c
arch/unicore32/include/asm/mmu_context.h
arch/unicore32/kernel/traps.c
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/boot/compressed/pagetable.c
arch/x86/boot/genimage.sh
arch/x86/crypto/aesni-intel_asm.S
arch/x86/crypto/camellia-aesni-avx-asm_64.S
arch/x86/crypto/camellia-aesni-avx2-asm_64.S
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/vsyscall/vsyscall_64.c
arch/x86/events/intel/bts.c
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/events/perf_event.h
arch/x86/include/asm/alternative.h
arch/x86/include/asm/asm-prototypes.h
arch/x86/include/asm/asm.h
arch/x86/include/asm/cpu_entry_area.h [new file with mode: 0644]
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/desc.h
arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/espfix.h
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/hypervisor.h
arch/x86/include/asm/intel_ds.h [new file with mode: 0644]
arch/x86/include/asm/invpcid.h [new file with mode: 0644]
arch/x86/include/asm/irqdomain.h
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/kdebug.h
arch/x86/include/asm/mmu.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/mshyperv.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nospec-branch.h [new file with mode: 0644]
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/pci_x86.h
arch/x86/include/asm/pgalloc.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_32_types.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/pgtable_64_types.h
arch/x86/include/asm/processor-flags.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/pti.h [new file with mode: 0644]
arch/x86/include/asm/stacktrace.h
arch/x86/include/asm/switch_to.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/trace/irq_vectors.h
arch/x86/include/asm/traps.h
arch/x86/include/asm/unwind.h
arch/x86/include/asm/vsyscall.h
arch/x86/include/asm/xen/hypercall.h
arch/x86/include/uapi/asm/processor-flags.h
arch/x86/kernel/alternative.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/apic_flat_64.c
arch/x86/kernel/apic/apic_noop.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/msi.c
arch/x86/kernel/apic/probe_32.c
arch/x86/kernel/apic/vector.c
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kernel/asm-offsets.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/doublefault.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/ftrace_32.S
arch/x86/kernel/ftrace_64.S
arch/x86/kernel/head_64.S
arch/x86/kernel/ioport.c
arch/x86/kernel/irq.c
arch/x86/kernel/irq_32.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/ldt.c
arch/x86/kernel/machine_kexec_32.c
arch/x86/kernel/paravirt_patch_64.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/stacktrace.c
arch/x86/kernel/tboot.c
arch/x86/kernel/tls.c
arch/x86/kernel/traps.c
arch/x86/kernel/unwind_orc.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/emulate.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/Makefile
arch/x86/lib/checksum_32.S
arch/x86/lib/delay.c
arch/x86/lib/retpoline.S [new file with mode: 0644]
arch/x86/mm/Makefile
arch/x86/mm/cpu_entry_area.c [new file with mode: 0644]
arch/x86/mm/debug_pagetables.c
arch/x86/mm/dump_pagetables.c
arch/x86/mm/fault.c
arch/x86/mm/init.c
arch/x86/mm/init_32.c
arch/x86/mm/kasan_init_64.c
arch/x86/mm/kaslr.c
arch/x86/mm/mem_encrypt.c
arch/x86/mm/pgtable.c
arch/x86/mm/pgtable_32.c
arch/x86/mm/pti.c [new file with mode: 0644]
arch/x86/mm/tlb.c
arch/x86/pci/common.c
arch/x86/pci/fixup.c
arch/x86/platform/efi/efi_64.c
arch/x86/platform/efi/quirks.c
arch/x86/platform/intel-mid/device_libs/platform_bt.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/platform/uv/uv_irq.c
arch/x86/power/cpu.c
arch/x86/xen/enlighten.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/setup.c
arch/x86/xen/xen-ops.h
block/bio.c
block/blk-core.c
block/blk-map.c
block/blk-mq.c
block/blk-throttle.c
block/blk.h
block/bounce.c
block/kyber-iosched.c
crypto/af_alg.c
crypto/algapi.c
crypto/algif_aead.c
crypto/algif_skcipher.c
crypto/chacha20poly1305.c
crypto/mcryptd.c
crypto/pcrypt.c
crypto/skcipher.c
drivers/acpi/apei/erst.c
drivers/acpi/cppc_acpi.c
drivers/acpi/nfit/core.c
drivers/android/binder.c
drivers/base/Kconfig
drivers/base/cacheinfo.c
drivers/base/cpu.c
drivers/block/loop.c
drivers/block/null_blk.c
drivers/block/rbd.c
drivers/bus/sunxi-rsb.c
drivers/clk/clk.c
drivers/clk/sunxi/clk-sun9i-mmc.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/imx6q-cpufreq.c
drivers/crypto/chelsio/Kconfig
drivers/crypto/inside-secure/safexcel.c
drivers/crypto/inside-secure/safexcel_cipher.c
drivers/crypto/inside-secure/safexcel_hash.c
drivers/crypto/n2_core.c
drivers/firmware/efi/capsule-loader.c
drivers/gpio/gpio-bcm-kona.c
drivers/gpio/gpio-brcmstb.c
drivers/gpio/gpio-reg.c
drivers/gpio/gpio-tegra.c
drivers/gpio/gpio-xgene-sb.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib-devprop.c
drivers/gpio/gpiolib-of.c
drivers/gpio/gpiolib.c
drivers/gpio/gpiolib.h
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.h
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_cm.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
drivers/gpu/drm/armada/armada_crtc.c
drivers/gpu/drm/armada/armada_crtc.h
drivers/gpu/drm/armada/armada_overlay.c
drivers/gpu/drm/drm_lease.c
drivers/gpu/drm/drm_plane.c
drivers/gpu/drm/drm_syncobj.c
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/display.c
drivers/gpu/drm/i915/gvt/gtt.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_sw_fence.c
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_cdclk.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_lpe_audio.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_psr.c
drivers/gpu/drm/i915/intel_runtime_pm.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_drm.c
drivers/gpu/drm/nouveau/nouveau_drv.h
drivers/gpu/drm/nouveau/nouveau_fbcon.c
drivers/gpu/drm/nouveau/nouveau_mem.c
drivers/gpu/drm/nouveau/nouveau_ttm.c
drivers/gpu/drm/nouveau/nouveau_vmm.c
drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c
drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/gpu/drm/tegra/sor.c
drivers/gpu/drm/ttm/ttm_page_alloc.c
drivers/gpu/drm/vc4/vc4_irq.c
drivers/gpu/drm/vc4/vc4_v3d.c
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
drivers/hid/hid-core.c
drivers/hid/hid-cp2112.c
drivers/hid/hid-holtekff.c
drivers/hv/vmbus_drv.c
drivers/hwmon/hwmon.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/nldev.c
drivers/infiniband/core/security.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx5/cmd.c
drivers/infiniband/hw/mlx5/cmd.h
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/input/joystick/analog.c
drivers/input/misc/ims-pcu.c
drivers/input/misc/xen-kbdfront.c
drivers/input/mouse/elantech.c
drivers/input/touchscreen/elants_i2c.c
drivers/input/touchscreen/hideep.c
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_types.h
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/exynos-iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/intel-svm.c
drivers/iommu/intel_irq_remapping.c
drivers/iommu/iommu.c
drivers/iommu/ipmmu-vmsa.c
drivers/iommu/msm_iommu.c
drivers/iommu/of_iommu.c
drivers/iommu/omap-iommu-debug.c
drivers/iommu/qcom_iommu.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-renesas-intc-irqpin.c
drivers/leds/led-core.c
drivers/mfd/arizona-irq.c
drivers/mfd/cros_ec_spi.c
drivers/mfd/rtsx_pcr.c
drivers/mfd/twl4030-audio.c
drivers/mfd/twl6040.c
drivers/mmc/host/renesas_sdhi_core.c
drivers/mmc/host/s3cmci.c
drivers/mtd/mtdcore.c
drivers/mtd/nand/brcmnand/brcmnand.c
drivers/mtd/nand/gpio.c
drivers/mtd/nand/gpmi-nand/gpmi-nand.c
drivers/mtd/nand/pxa3xx_nand.c
drivers/mux/core.c
drivers/net/can/flexcan.c
drivers/net/can/usb/ems_usb.c
drivers/net/can/usb/gs_usb.c
drivers/net/can/vxcan.c
drivers/net/dsa/b53/b53_common.c
drivers/net/ethernet/3com/3c59x.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/arc/emac.h
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/broadcom/tg3.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/gianfar_ptp.c
drivers/net/ethernet/intel/e1000/e1000.h
drivers/net/ethernet/intel/e1000/e1000_main.c
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/net/ethernet/mellanox/mlx5/core/lag.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/rl.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/netronome/nfp/bpf/main.c
drivers/net/ethernet/netronome/nfp/bpf/main.h
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/qualcomm/emac/emac.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/enh_desc.c
drivers/net/ethernet/stmicro/stmmac/norm_desc.c
drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/geneve.c
drivers/net/macvlan.c
drivers/net/phy/marvell.c
drivers/net/phy/mdio-sun4i.c
drivers/net/phy/mdio-xgene.c
drivers/net/phy/micrel.c
drivers/net/phy/phylink.c
drivers/net/phy/sfp-bus.c
drivers/net/usb/qmi_wwan.c
drivers/net/vxlan.c
drivers/net/wireless/ath/wcn36xx/main.c
drivers/net/wireless/ath/wcn36xx/pmc.c
drivers/net/wireless/intel/iwlwifi/pcie/internal.h
drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
drivers/net/wireless/intel/iwlwifi/pcie/tx.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/xen-netfront.c
drivers/nvdimm/btt.c
drivers/nvdimm/btt.h
drivers/nvdimm/pfn_devs.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/fcloop.c
drivers/nvmem/meson-mx-efuse.c
drivers/of/of_mdio.c
drivers/parisc/dino.c
drivers/parisc/eisa_eeprom.c
drivers/parisc/lba_pci.c
drivers/pci/host/pci-hyperv.c
drivers/pci/pci-driver.c
drivers/phy/motorola/phy-cpcap-usb.c
drivers/phy/renesas/Kconfig
drivers/phy/rockchip/phy-rockchip-typec.c
drivers/phy/tegra/xusb.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/pinctrl-single.c
drivers/pinctrl/stm32/pinctrl-stm32.c
drivers/platform/x86/wmi.c
drivers/s390/block/dasd_3990_erp.c
drivers/s390/char/Makefile
drivers/s390/net/qeth_core_main.c
drivers/scsi/aacraid/aacraid.h
drivers/scsi/aacraid/linit.c
drivers/scsi/osd/osd_initiator.c
drivers/scsi/scsi_devinfo.c
drivers/scsi/scsi_scan.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/scsi_transport_spi.c
drivers/scsi/storvsc_drv.c
drivers/spi/spi-armada-3700.c
drivers/spi/spi-atmel.c
drivers/spi/spi-rspi.c
drivers/spi/spi-sun4i.c
drivers/spi/spi-xilinx.c
drivers/staging/android/ashmem.c
drivers/staging/android/ion/Kconfig
drivers/staging/android/ion/ion.c
drivers/staging/android/ion/ion_cma_heap.c
drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
drivers/target/target_core_pscsi.c
drivers/thunderbolt/nhi.c
drivers/tty/n_tty.c
drivers/usb/chipidea/ci_hdrc_msm.c
drivers/usb/core/config.c
drivers/usb/core/quirks.c
drivers/usb/gadget/udc/core.c
drivers/usb/host/xhci-debugfs.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci.c
drivers/usb/misc/usb3503.c
drivers/usb/mon/mon_bin.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/option.c
drivers/usb/serial/qcserial.c
drivers/usb/storage/unusual_uas.h
drivers/usb/usbip/stub_dev.c
drivers/usb/usbip/stub_main.c
drivers/usb/usbip/stub_rx.c
drivers/usb/usbip/stub_tx.c
drivers/usb/usbip/usbip_common.c
drivers/usb/usbip/vhci_hcd.c
drivers/usb/usbip/vhci_rx.c
drivers/usb/usbip/vhci_tx.c
drivers/usb/usbip/vudc_rx.c
drivers/usb/usbip/vudc_tx.c
drivers/xen/balloon.c
drivers/xen/gntdev.c
drivers/xen/pvcalls-front.c
fs/afs/dir.c
fs/afs/inode.c
fs/afs/rxrpc.c
fs/afs/write.c
fs/btrfs/delayed-inode.c
fs/btrfs/volumes.c
fs/exec.c
fs/super.c
fs/userfaultfd.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_attr.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_leaf.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_defer.h
fs/xfs/libxfs/xfs_iext_tree.c
fs/xfs/libxfs/xfs_refcount.c
fs/xfs/libxfs/xfs_rmap.c
fs/xfs/libxfs/xfs_rmap.h
fs/xfs/xfs_aops.c
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_icache.h
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_iomap.c
fs/xfs/xfs_qm.c
fs/xfs/xfs_reflink.c
fs/xfs/xfs_super.c
include/asm-generic/mm_hooks.h
include/asm-generic/pgtable.h
include/crypto/if_alg.h
include/crypto/mcryptd.h
include/kvm/arm_arch_timer.h
include/linux/bio.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/completion.h
include/linux/cpu.h
include/linux/cpuhotplug.h
include/linux/crash_core.h
include/linux/efi.h
include/linux/fscache.h
include/linux/gpio/driver.h
include/linux/intel-iommu.h
include/linux/ipv6.h
include/linux/irq.h
include/linux/irqdesc.h
include/linux/irqdomain.h
include/linux/irqflags.h
include/linux/lockdep.h
include/linux/mfd/rtsx_pci.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/of_iommu.h
include/linux/pti.h [new file with mode: 0644]
include/linux/sh_eth.h
include/linux/spi/spi.h
include/linux/tick.h
include/linux/timer.h
include/net/cfg80211.h
include/net/pkt_cls.h
include/net/sctp/structs.h
include/net/sock.h
include/net/vxlan.h
include/net/xfrm.h
include/trace/events/clk.h
include/trace/events/kvm.h
include/trace/events/tcp.h
include/uapi/linux/if_ether.h
include/uapi/linux/libc-compat.h
include/uapi/linux/netfilter/nf_conntrack_common.h
include/xen/balloon.h
init/Kconfig
init/main.c
kernel/acct.c
kernel/bpf/arraymap.c
kernel/bpf/core.c
kernel/bpf/inode.c
kernel/bpf/sockmap.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/cgroup/cgroup-v1.c
kernel/cgroup/cgroup.c
kernel/cpu.c
kernel/crash_core.c
kernel/exit.c
kernel/fork.c
kernel/irq/debug.h
kernel/irq/debugfs.c
kernel/irq/generic-chip.c
kernel/irq/internals.h
kernel/irq/irqdomain.c
kernel/irq/msi.c
kernel/pid.c
kernel/sched/completion.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/membarrier.c
kernel/time/Kconfig
kernel/time/tick-sched.c
kernel/time/timer.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
lib/kobject_uevent.c
lib/mpi/longlong.h
lib/test_bpf.c
lib/timerqueue.c
mm/backing-dev.c
mm/debug.c
mm/kmemleak.c
mm/mprotect.c
mm/page_alloc.c
mm/sparse.c
mm/vmscan.c
mm/zsmalloc.c
net/8021q/vlan.c
net/bluetooth/l2cap_core.c
net/bridge/br_netlink.c
net/caif/caif_dev.c
net/caif/caif_usb.c
net/caif/cfcnfg.c
net/caif/cfctrl.c
net/core/dev.c
net/core/ethtool.c
net/core/filter.c
net/core/net_namespace.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock_diag.c
net/core/sysctl_net_core.c
net/ipv4/fib_frontend.c
net/ipv4/fib_semantics.c
net/ipv4/ip_gre.c
net/ipv4/raw.c
net/ipv4/xfrm4_input.c
net/ipv6/af_inet6.c
net/ipv6/exthdrs.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ipv6_sockglue.c
net/ipv6/route.c
net/ipv6/xfrm6_input.c
net/mac80211/rx.c
net/netfilter/nf_tables_api.c
net/netfilter/xt_bpf.c
net/openvswitch/flow.c
net/rds/rdma.c
net/rds/send.c
net/sched/act_gact.c
net/sched/act_mirred.c
net/sched/cls_api.c
net/sched/cls_bpf.c
net/sched/sch_generic.c
net/sctp/debug.c
net/sctp/input.c
net/sctp/socket.c
net/sctp/stream.c
net/sctp/transport.c
net/sctp/ulpqueue.c
net/socket.c
net/strparser/strparser.c
net/tipc/bearer.c
net/tipc/group.c
net/tipc/monitor.c
net/tipc/socket.c
net/wireless/Makefile
net/wireless/certs/sforshee.hex [new file with mode: 0644]
net/wireless/certs/sforshee.x509 [deleted file]
net/wireless/nl80211.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
scripts/genksyms/.gitignore
scripts/kconfig/expr.c
security/Kconfig
security/apparmor/domain.c
security/apparmor/include/perms.h
security/apparmor/ipc.c
security/apparmor/mount.c
security/commoncap.c
sound/core/oss/pcm_oss.c
sound/core/oss/pcm_plugin.c
sound/core/pcm_lib.c
sound/core/pcm_native.c
sound/core/rawmidi.c
sound/drivers/aloop.c
sound/hda/hdac_i915.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/amd/acp-pcm-dma.c
sound/soc/atmel/Kconfig
sound/soc/codecs/da7218.c
sound/soc/codecs/msm8916-wcd-analog.c
sound/soc/codecs/msm8916-wcd-digital.c
sound/soc/codecs/nau8825.c
sound/soc/codecs/rt5514-spi.c
sound/soc/codecs/rt5514.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/rt5663.c
sound/soc/codecs/rt5663.h
sound/soc/codecs/tlv320aic31xx.h
sound/soc/codecs/twl4030.c
sound/soc/codecs/wm_adsp.c
sound/soc/fsl/fsl_asrc.h
sound/soc/fsl/fsl_ssi.c
sound/soc/intel/boards/kbl_rt5663_max98927.c
sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
sound/soc/intel/skylake/skl-nhlt.c
sound/soc/intel/skylake/skl-topology.c
sound/soc/rockchip/rockchip_spdif.c
sound/soc/sh/rcar/adg.c
sound/soc/sh/rcar/core.c
sound/soc/sh/rcar/dma.c
sound/soc/sh/rcar/ssi.c
sound/soc/sh/rcar/ssiu.c
sound/usb/mixer.c
sound/usb/quirks.c
tools/arch/s390/include/uapi/asm/bpf_perf_event.h
tools/arch/s390/include/uapi/asm/perf_regs.h [new file with mode: 0644]
tools/bpf/bpftool/map.c
tools/bpf/bpftool/prog.c
tools/kvm/kvm_stat/kvm_stat
tools/kvm/kvm_stat/kvm_stat.txt
tools/objtool/Makefile
tools/objtool/arch/x86/decode.c
tools/objtool/builtin-orc.c
tools/objtool/check.c
tools/objtool/check.h
tools/objtool/orc_gen.c
tools/perf/Makefile.config
tools/perf/arch/s390/include/perf_regs.h
tools/perf/check-headers.sh
tools/perf/jvmti/jvmti_agent.c
tools/perf/jvmti/jvmti_agent.h
tools/perf/jvmti/libjvmti.c
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/test_align.c
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/net/config
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/ldt_gdt.c
tools/testing/selftests/x86/test_vsyscall.c [new file with mode: 0644]
tools/usb/usbip/src/utils.c
virt/kvm/arm/arch_timer.c
virt/kvm/arm/arm.c
virt/kvm/arm/mmio.c
virt/kvm/arm/mmu.c

index 1469ff0d3f4d55dde07c676e4c4fc29f432d20f3..e18cab73e209a7b3057cfd672356dab6d9846483 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -107,6 +107,7 @@ Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
 Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
 Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
 Mark Brown <broonie@sirena.org.uk>
+Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
 Matthieu CASTET <castet.matthieu@free.fr>
index d6d862db3b5d65fe09c26783298bba21ceec5558..bfd29bc8d37af1bbc4913deee9d941b1692bedda 100644 (file)
@@ -375,3 +375,19 @@ Contact:   Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   information about CPUs heterogeneity.
 
                cpu_capacity: capacity of cpu#.
+
+What:          /sys/devices/system/cpu/vulnerabilities
+               /sys/devices/system/cpu/vulnerabilities/meltdown
+               /sys/devices/system/cpu/vulnerabilities/spectre_v1
+               /sys/devices/system/cpu/vulnerabilities/spectre_v2
+Date:          January 2018
+Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:   Information about CPU vulnerabilities
+
+               The files are named after the code names of CPU
+               vulnerabilities. The output of those files reflects the
+               state of the CPUs in the system. Possible output values:
+
+               "Not affected"    CPU is not affected by the vulnerability
+               "Vulnerable"      CPU is affected and no mitigation in effect
+               "Mitigation: $M"  CPU is affected and mitigation $M is in effect
index b2598cc9834c3cbacfd9b3b1d885ed69c4c4c133..7242cbda15dd3bf8c4debea387ff2c8e376aeb60 100644 (file)
@@ -109,6 +109,7 @@ parameter is applicable::
        IPV6    IPv6 support is enabled.
        ISAPNP  ISA PnP code is enabled.
        ISDN    Appropriate ISDN support is enabled.
+       ISOL    CPU Isolation is enabled.
        JOY     Appropriate joystick support is enabled.
        KGDB    Kernel debugger support is enabled.
        KVM     Kernel Virtual Machine support is enabled.
index 6571fbfdb2a1527c25b3a01e9c4228c84adce639..46b26bfee27ba81267703a1ceac7c0e082f1e5ec 100644 (file)
                        not play well with APC CPU idle - disable it if you have
                        APC and your system crashes randomly.
 
-       apic=           [APIC,X86-32] Advanced Programmable Interrupt Controller
+       apic=           [APIC,X86] Advanced Programmable Interrupt Controller
                        Change the output verbosity whilst booting
                        Format: { quiet (default) | verbose | debug }
                        Change the amount of debugging information output
                        when initialising the APIC and IO-APIC components.
+                       For X86-32, this can also be used to specify an APIC
+                       driver name.
+                       Format: apic=driver_name
+                       Examples: apic=bigsmp
 
        apic_extnmi=    [APIC,X86] External NMI delivery setting
                        Format: { bsp (default) | all | none }
                        It will be ignored when crashkernel=X,high is not used
                        or memory reserved is below 4G.
 
-       crossrelease_fullstack
-                       [KNL] Allow to record full stack trace in cross-release
-
        cryptomgr.notests
                         [KNL] Disable crypto self-tests
 
        isapnp=         [ISAPNP]
                        Format: <RDP>,<reset>,<pci_scan>,<verbosity>
 
-       isolcpus=       [KNL,SMP] Isolate a given set of CPUs from disturbance.
+       isolcpus=       [KNL,SMP,ISOL] Isolate a given set of CPUs from disturbance.
                        [Deprecated - use cpusets instead]
                        Format: [flag-list,]<cpu-list>
 
        nosmt           [KNL,S390] Disable symmetric multithreading (SMT).
                        Equivalent to smt=1.
 
+       nospectre_v2    [X86] Disable all mitigations for the Spectre variant 2
+                       (indirect branch prediction) vulnerability. System may
+                       allow data leaks with this option, which is equivalent
+                       to spectre_v2=off.
+
        noxsave         [BUGS=X86] Disables x86 extended register state save
                        and restore using xsave. The kernel will fallback to
                        enabling legacy floating-point and sse state.
                        Valid arguments: on, off
                        Default: on
 
-       nohz_full=      [KNL,BOOT]
+       nohz_full=      [KNL,BOOT,SMP,ISOL]
                        The argument is a cpu list, as described above.
                        In kernels built with CONFIG_NO_HZ_FULL=y, set
                        the specified list of CPUs whose tick will be stopped
                pcie_scan_all   Scan all possible PCIe devices.  Otherwise we
                                only look for one device below a PCIe downstream
                                port.
+               big_root_window Try to add a big 64bit memory window to the PCIe
+                               root complex on AMD CPUs. Some GFX hardware
+                               can resize a BAR to allow access to all VRAM.
+                               Adding the window is slightly risky (it may
+                               conflict with unreported devices), so this
+                               taints the kernel.
 
        pcie_aspm=      [PCIE] Forcibly enable or disable PCIe Active State Power
                        Management.
        pt.             [PARIDE]
                        See Documentation/blockdev/paride.txt.
 
+       pti=            [X86_64] Control Page Table Isolation of user and
+                       kernel address spaces.  Disabling this feature
+                       removes hardening, but improves performance of
+                       system calls and interrupts.
+
+                       on   - unconditionally enable
+                       off  - unconditionally disable
+                       auto - kernel detects whether your CPU model is
+                              vulnerable to issues that PTI mitigates
+
+                       Not specifying this option is equivalent to pti=auto.
+
+       nopti           [X86_64]
+                       Equivalent to pti=off
+
        pty.legacy_count=
                        [KNL] Number of legacy pty's. Overwrites compiled-in
                        default number.
        sonypi.*=       [HW] Sony Programmable I/O Control Device driver
                        See Documentation/laptops/sonypi.txt
 
+       spectre_v2=     [X86] Control mitigation of Spectre variant 2
+                       (indirect branch speculation) vulnerability.
+
+                       on   - unconditionally enable
+                       off  - unconditionally disable
+                       auto - kernel detects whether your CPU model is
+                              vulnerable
+
+                       Selecting 'on' will, and 'auto' may, choose a
+                       mitigation method at run time according to the
+                       CPU, the available microcode, the setting of the
+                       CONFIG_RETPOLINE configuration option, and the
+                       compiler with which the kernel was built.
+
+                       Specific mitigations can also be selected manually:
+
+                       retpoline         - replace indirect branches
+                       retpoline,generic - google's original retpoline
+                       retpoline,amd     - AMD-specific minimal thunk
+
+                       Not specifying this option is equivalent to
+                       spectre_v2=auto.
+
        spia_io_base=   [HW,MTD]
        spia_fio_base=
        spia_pedr=
index de50a8561774249351515662404e2a1f8328aba6..9b55952039a692d8119ce62502af9a4dd071b8e6 100644 (file)
@@ -230,7 +230,7 @@ If supported by your machine this will be exposed by the WMI bus with
 a sysfs attribute called "force_power".
 
 For example the intel-wmi-thunderbolt driver exposes this attribute in:
-  /sys/devices/platform/PNP0C14:00/wmi_bus/wmi_bus-PNP0C14:00/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
+  /sys/bus/wmi/devices/86CCFD48-205E-4A77-9C48-2021CBEDE341/force_power
 
   To force the power to on, write 1 to this attribute file.
   To disable force power, write 0 to this attribute file.
index 376fa2f50e6bc9b41052928037acd4b3a382d380..956bb046e599d576e3f881b2901e0d369a3c9802 100644 (file)
@@ -13,7 +13,6 @@ Required properties:
                  at25df321a
                  at25df641
                  at26df081a
-                 en25s64
                  mr25h128
                  mr25h256
                  mr25h10
@@ -33,7 +32,6 @@ Required properties:
                  s25fl008k
                  s25fl064k
                  sst25vf040b
-                 sst25wf040b
                  m25p40
                  m25p80
                  m25p16
index 5ca5a709b6aa1989901eff6b9239f0bc71d0d272..3ab9dfef38d113523549e66cee20ad8db6e56842 100644 (file)
@@ -73,7 +73,7 @@ Example:
                compatible = "dlg,da7218";
                reg = <0x1a>;
                interrupt-parent = <&gpio6>;
-               interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
                wakeup-source;
 
                VDD-supply = <&reg_audio>;
index cf61681826b675ad984dc17f6c1dbcc2e1b6fc35..5b54d2d045c355808bf0f58afba9ec7539ff4472 100644 (file)
@@ -77,7 +77,7 @@ Example:
                reg = <0x1a>;
 
                interrupt-parent = <&gpio6>;
-               interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
 
                VDD-supply = <&reg_audio>;
                VDDMIC-supply = <&reg_audio>;
index 5bf13960f7f4a3c826c10b1e15a618df82d82403..e3c48b20b1a691b37d0b425251a257c682a38eca 100644 (file)
@@ -12,24 +12,30 @@ Required properties:
   - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
 - reg : Offset and length of the register set for the device
 - interrupts : Should contain CSPI/eCSPI interrupt
-- cs-gpios : Specifies the gpio pins to be used for chipselects.
 - clocks : Clock specifiers for both ipg and per clocks.
 - clock-names : Clock names should include both "ipg" and "per"
 See the clock consumer binding,
        Documentation/devicetree/bindings/clock/clock-bindings.txt
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
-               Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request names should include "tx" and "rx" if present.
 
-Obsolete properties:
-- fsl,spi-num-chipselects : Contains the number of the chipselect
+Recommended properties:
+- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt.  While the native chip
+select lines can be used, they appear to always generate a pulse between each
+word of a transfer.  Most use cases will require GPIO based chip selects to
+generate a valid transaction.
 
 Optional properties:
+- num-cs :  Number of total chip selects, see spi-bus.txt.
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+Documentation/devicetree/bindings/dma/dma.txt.
+- dma-names: DMA request names, if present, should include "tx" and "rx".
 - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
 controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
 the SPI_READY mode-flag needs to be set too.
 Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
 
+Obsolete properties:
+- fsl,spi-num-chipselects : Contains the number of the chipselect
+
 Example:
 
 ecspi@70010000 {
index c0727dc36271e99eaf844c205fff6d901c3d057b..f2f3f8592a6f5e12bdcd2d56005deaf52030d0f7 100644 (file)
@@ -25,8 +25,8 @@ available from the following download page.  At least "mkfs.nilfs2",
 cleaner or garbage collector) are required.  Details on the tools are
 described in the man pages included in the package.
 
-Project web page:    http://nilfs.sourceforge.net/
-Download page:       http://nilfs.sourceforge.net/en/download.html
+Project web page:    https://nilfs.sourceforge.io/
+Download page:       https://nilfs.sourceforge.io/en/download.html
 List info:           http://vger.kernel.org/vger-lists.html#linux-nilfs
 
 Caveats
index 2e7ee0313c1cd6c0377d2b4a3f4948641f9a84a2..e94d3ac2bdd02db6711db2a5424b69c613c20a69 100644 (file)
@@ -341,10 +341,7 @@ GuC
 GuC-specific firmware loader
 ----------------------------
 
-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
-   :doc: GuC-specific firmware loader
-
-.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
+.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c
    :internal:
 
 GuC-based command submission
index 262722d8867b2aa9f70f137529dd298e5e36af60..c4a293a03c337f5080d9cf682f64428fc74a1d79 100644 (file)
@@ -200,10 +200,14 @@ module state. Dependency expressions have the following syntax:
 <expr> ::= <symbol>                             (1)
            <symbol> '=' <symbol>                (2)
            <symbol> '!=' <symbol>               (3)
-           '(' <expr> ')'                       (4)
-           '!' <expr>                           (5)
-           <expr> '&&' <expr>                   (6)
-           <expr> '||' <expr>                   (7)
+           <symbol1> '<' <symbol2>              (4)
+           <symbol1> '>' <symbol2>              (4)
+           <symbol1> '<=' <symbol2>             (4)
+           <symbol1> '>=' <symbol2>             (4)
+           '(' <expr> ')'                       (5)
+           '!' <expr>                           (6)
+           <expr> '&&' <expr>                   (7)
+           <expr> '||' <expr>                   (8)
 
 Expressions are listed in decreasing order of precedence. 
 
@@ -214,10 +218,13 @@ Expressions are listed in decreasing order of precedence.
     otherwise 'n'.
 (3) If the values of both symbols are equal, it returns 'n',
     otherwise 'y'.
-(4) Returns the value of the expression. Used to override precedence.
-(5) Returns the result of (2-/expr/).
-(6) Returns the result of min(/expr/, /expr/).
-(7) Returns the result of max(/expr/, /expr/).
+(4) If value of <symbol1> is respectively lower, greater, lower-or-equal,
+    or greater-or-equal than value of <symbol2>, it returns 'y',
+    otherwise 'n'.
+(5) Returns the value of the expression. Used to override precedence.
+(6) Returns the result of (2-/expr/).
+(7) Returns the result of min(/expr/, /expr/).
+(8) Returns the result of max(/expr/, /expr/).
 
 An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
 respectively for calculations). A menu entry becomes visible when its
index 66e62086624501fe76c170f77efc1cbc0844032f..7d4b15977d61201eb8b265293d3d95a6b042c7e6 100644 (file)
@@ -9,6 +9,7 @@ Contents:
    batman-adv
    kapi
    z8530book
+   msg_zerocopy
 
 .. only::  subproject
 
@@ -16,4 +17,3 @@ Contents:
    =======
 
    * :ref:`genindex`
-
index 77f6d7e25cfda65ac36e0c47bb2f8bdacc2fee2a..291a012649678035b5d0fdc306904093ceedf610 100644 (file)
@@ -72,6 +72,10 @@ this flag, a process must first signal intent by setting a socket option:
        if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)))
                error(1, errno, "setsockopt zerocopy");
 
+Setting the socket option only works when the socket is in its initial
+(TCP_CLOSED) state.  Trying to set the option for a socket returned by accept(),
+for example, will lead to an EBUSY error. In this case, the option should be set
+to the listening socket and it will be inherited by the accepted sockets.
 
 Transmission
 ------------
index 441a4b9b666fbb2b2aace2cbc2f5ab542635d553..5908a21fddb6035cd25bb759bfd01ee405371163 100644 (file)
@@ -693,7 +693,7 @@ such specification consists of a number of lines with an inverval value
 in each line. The rules stated above are best illustrated with an example:
 
 # mkdir functions/uvc.usb0/control/header/h
-# cd functions/uvc.usb0/control/header/h
+# cd functions/uvc.usb0/control/
 # ln -s header/h class/fs
 # ln -s header/h class/ss
 # mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
new file mode 100644 (file)
index 0000000..d11eff6
--- /dev/null
@@ -0,0 +1,186 @@
+Overview
+========
+
+Page Table Isolation (pti, previously known as KAISER[1]) is a
+countermeasure against attacks on the shared user/kernel address
+space such as the "Meltdown" approach[2].
+
+To mitigate this class of attacks, we create an independent set of
+page tables for use only when running userspace applications.  When
+the kernel is entered via syscalls, interrupts or exceptions, the
+page tables are switched to the full "kernel" copy.  When the system
+switches back to user mode, the user copy is used again.
+
+The userspace page tables contain only a minimal amount of kernel
+data: only what is needed to enter/exit the kernel such as the
+entry/exit functions themselves and the interrupt descriptor table
+(IDT).  There are a few strictly unnecessary things that get mapped
+such as the first C function when entering an interrupt (see
+comments in pti.c).
+
+This approach helps to ensure that side-channel attacks leveraging
+the paging structures do not function when PTI is enabled.  It can be
+enabled by setting CONFIG_PAGE_TABLE_ISOLATION=y at compile time.
+Once enabled at compile-time, it can be disabled at boot with the
+'nopti' or 'pti=' kernel parameters (see kernel-parameters.txt).
+
+Page Table Management
+=====================
+
+When PTI is enabled, the kernel manages two sets of page tables.
+The first set is very similar to the single set which is present in
+kernels without PTI.  This includes a complete mapping of userspace
+that the kernel can use for things like copy_to_user().
+
+Although _complete_, the user portion of the kernel page tables is
+crippled by setting the NX bit in the top level.  This ensures
+that any missed kernel->user CR3 switch will immediately crash
+userspace upon executing its first instruction.
+
+The userspace page tables map only the kernel data needed to enter
+and exit the kernel.  This data is entirely contained in the 'struct
+cpu_entry_area' structure which is placed in the fixmap which gives
+each CPU's copy of the area a compile-time-fixed virtual address.
+
+For new userspace mappings, the kernel makes the entries in its
+page tables like normal.  The only difference is when the kernel
+makes entries in the top (PGD) level.  In addition to setting the
+entry in the main kernel PGD, a copy of the entry is made in the
+userspace page tables' PGD.
+
+This sharing at the PGD level also inherently shares all the lower
+layers of the page tables.  This leaves a single, shared set of
+userspace page tables to manage.  One PTE to lock, one set of
+accessed bits, dirty bits, etc...
+
+Overhead
+========
+
+Protection against side-channel attacks is important.  But,
+this protection comes at a cost:
+
+1. Increased Memory Use
+  a. Each process now needs an order-1 PGD instead of order-0.
+     (Consumes an additional 4k per process).
+  b. The 'cpu_entry_area' structure must be 2MB in size and 2MB
+     aligned so that it can be mapped by setting a single PMD
+     entry.  This consumes nearly 2MB of RAM once the kernel
+     is decompressed, but no space in the kernel image itself.
+
+2. Runtime Cost
+  a. CR3 manipulation to switch between the page table copies
+     must be done at interrupt, syscall, and exception entry
+     and exit (it can be skipped when the kernel is interrupted,
+     though.)  Moves to CR3 are on the order of a hundred
+     cycles, and are required at every entry and exit.
+  b. A "trampoline" must be used for SYSCALL entry.  This
+     trampoline depends on a smaller set of resources than the
+     non-PTI SYSCALL entry code, so requires mapping fewer
+     things into the userspace page tables.  The downside is
+     that stacks must be switched at entry time.
+  d. Global pages are disabled for all kernel structures not
+     mapped into both kernel and userspace page tables.  This
+     feature of the MMU allows different processes to share TLB
+     entries mapping the kernel.  Losing the feature means more
+     TLB misses after a context switch.  The actual loss of
+     performance is very small, however, never exceeding 1%.
+  d. Process Context IDentifiers (PCID) is a CPU feature that
+     allows us to skip flushing the entire TLB when switching page
+     tables by setting a special bit in CR3 when the page tables
+     are changed.  This makes switching the page tables (at context
+     switch, or kernel entry/exit) cheaper.  But, on systems with
+     PCID support, the context switch code must flush both the user
+     and kernel entries out of the TLB.  The user PCID TLB flush is
+     deferred until the exit to userspace, minimizing the cost.
+     See intel.com/sdm for the gory PCID/INVPCID details.
+  e. The userspace page tables must be populated for each new
+     process.  Even without PTI, the shared kernel mappings
+     are created by copying top-level (PGD) entries into each
+     new process.  But, with PTI, there are now *two* kernel
+     mappings: one in the kernel page tables that maps everything
+     and one for the entry/exit structures.  At fork(), we need to
+     copy both.
+  f. In addition to the fork()-time copying, there must also
+     be an update to the userspace PGD any time a set_pgd() is done
+     on a PGD used to map userspace.  This ensures that the kernel
+     and userspace copies always map the same userspace
+     memory.
+  g. On systems without PCID support, each CR3 write flushes
+     the entire TLB.  That means that each syscall, interrupt
+     or exception flushes the TLB.
+  h. INVPCID is a TLB-flushing instruction which allows flushing
+     of TLB entries for non-current PCIDs.  Some systems support
+     PCIDs, but do not support INVPCID.  On these systems, addresses
+     can only be flushed from the TLB for the current PCID.  When
+     flushing a kernel address, we need to flush all PCIDs, so a
+     single kernel address flush will require a TLB-flushing CR3
+     write upon the next use of every PCID.
+
+Possible Future Work
+====================
+1. We can be more careful about not actually writing to CR3
+   unless its value is actually changed.
+2. Allow PTI to be enabled/disabled at runtime in addition to the
+   boot-time switching.
+
+Testing
+========
+
+To test stability of PTI, the following test procedure is recommended,
+ideally doing all of these in parallel:
+
+1. Set CONFIG_DEBUG_ENTRY=y
+2. Run several copies of all of the tools/testing/selftests/x86/ tests
+   (excluding MPX and protection_keys) in a loop on multiple CPUs for
+   several minutes.  These tests frequently uncover corner cases in the
+   kernel entry code.  In general, old kernels might cause these tests
+   themselves to crash, but they should never crash the kernel.
+3. Run the 'perf' tool in a mode (top or record) that generates many
+   frequent performance monitoring non-maskable interrupts (see "NMI"
+   in /proc/interrupts).  This exercises the NMI entry/exit code which
+   is known to trigger bugs in code paths that did not expect to be
+   interrupted, including nested NMIs.  Using "-c" boosts the rate of
+   NMIs, and using two -c with separate counters encourages nested NMIs
+   and less deterministic behavior.
+
+       while true; do perf record -c 10000 -e instructions,cycles -a sleep 10; done
+
+4. Launch a KVM virtual machine.
+5. Run 32-bit binaries on systems supporting the SYSCALL instruction.
+   This has been a lightly-tested code path and needs extra scrutiny.
+
+Debugging
+=========
+
+Bugs in PTI cause a few different signatures of crashes
+that are worth noting here.
+
+ * Failures of the selftests/x86 code.  Usually a bug in one of the
+   more obscure corners of entry_64.S
+ * Crashes in early boot, especially around CPU bringup.  Bugs
+   in the trampoline code or mappings cause these.
+ * Crashes at the first interrupt.  Caused by bugs in entry_64.S,
+   like screwing up a page table switch.  Also caused by
+   incorrectly mapping the IRQ handler entry code.
+ * Crashes at the first NMI.  The NMI code is separate from main
+   interrupt handlers and can have bugs that do not affect
+   normal interrupts.  Also caused by incorrectly mapping NMI
+   code.  NMIs that interrupt the entry code must be very
+   careful and can be the cause of crashes that show up when
+   running perf.
+ * Kernel crashes at the first exit to userspace.  entry_64.S
+   bugs, or failing to map some of the exit code.
+ * Crashes at first interrupt that interrupts userspace. The paths
+   in entry_64.S that return to userspace are sometimes separate
+   from the ones that return to the kernel.
+ * Double faults: overflowing the kernel stack because of page
+   faults upon page faults.  Caused by touching non-pti-mapped
+   data in the entry code, or forgetting to switch to kernel
+   CR3 before calling into C functions which are not pti-mapped.
+ * Userspace segfaults early in boot, sometimes manifesting
+   as mount(8) failing to mount the rootfs.  These have
+   tended to be TLB invalidation issues.  Usually invalidating
+   the wrong PCID, or otherwise missing an invalidation.
+
+1. https://gruss.cc/files/kaiser.pdf
+2. https://meltdownattack.com/meltdown.pdf
index 3448e675b4623ce81b5e0bc1116c52a12c411801..ea91cb61a60297ac658a4160cee200da75e6f00d 100644 (file)
@@ -1,6 +1,4 @@
 
-<previous description obsolete, deleted>
-
 Virtual memory map with 4 level page tables:
 
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
@@ -14,13 +12,17 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
 ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
 ... unused hole ...
+                                   vaddr_end for KASLR
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
+fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Virtual memory map with 5 level page tables:
@@ -29,26 +31,31 @@ Virtual memory map with 5 level page tables:
 hole caused by [56:63] sign extension
 ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
 ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
-ff90000000000000 - ff91ffffffffffff (=49 bits) hole
-ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
+ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
+ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
 ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
 ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
 ... unused hole ...
 ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
+... unused hole ...
+                                   vaddr_end for KASLR
+fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
 ... unused hole ...
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Architecture defines a 64-bit virtual address. Implementations can support
 less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
-through to the most-significant implemented bit are set to either all ones
-or all zero. This causes hole between user space and kernel addresses.
+through to the most-significant implemented bit are sign extended.
+This causes hole between user space and kernel addresses if you interpret them
+as unsigned.
 
 The direct mapping covers all memory in the system up to the highest
 memory address (this means in some cases it can also include PCI memory
@@ -58,19 +65,15 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
 the processes using the page fault handler, with init_top_pgt as
 reference.
 
-Current X86-64 implementations support up to 46 bits of address space (64 TB),
-which is our current limit. This expands into MBZ space in the page tables.
-
 We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
 memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
 during EFI runtime calls.
 
-The module mapping space size changes based on the CONFIG requirements for the
-following fixmap section.
-
 Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
 physical memory, vmalloc/ioremap space and virtual memory map are randomized.
 Their order is preserved but their base will be offset early at boot time.
 
--Andi Kleen, Jul 2004
+Be very careful vs. KASLR when changing anything here. The KASLR address
+range must not overlap with anything except the KASAN shadow area, which is
+correct as KASAN disables KASLR.
index a6e86e20761e143ca976d4f8170e60b603bb5ed9..18994806e441f86f2415e417458a02dc111fa6d2 100644 (file)
@@ -2621,24 +2621,22 @@ F:      fs/bfs/
 F:     include/uapi/linux/bfs_fs.h
 
 BLACKFIN ARCHITECTURE
-M:     Steven Miao <realmz6@gmail.com>
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 T:     git git://git.code.sf.net/p/adi-linux/code
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     arch/blackfin/
 
 BLACKFIN EMAC DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/net/ethernet/adi/
 
 BLACKFIN MEDIA DRIVER
-M:     Scott Jiang <scott.jiang.linux@gmail.com>
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org/
-S:     Supported
+S:     Orphan
 F:     drivers/media/platform/blackfin/
 F:     drivers/media/i2c/adv7183*
 F:     drivers/media/i2c/vs6624*
@@ -2646,25 +2644,25 @@ F:      drivers/media/i2c/vs6624*
 BLACKFIN RTC DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/rtc/rtc-bfin.c
 
 BLACKFIN SDH DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/mmc/host/bfin_sdh.c
 
 BLACKFIN SERIAL DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/tty/serial/bfin_uart.c
 
 BLACKFIN WATCHDOG DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/watchdog/bfin_wdt.c
 
 BLINKM RGB LED DRIVER
@@ -5151,15 +5149,15 @@ F:      sound/usb/misc/ua101.c
 EFI TEST DRIVER
 L:     linux-efi@vger.kernel.org
 M:     Ivan Hu <ivan.hu@canonical.com>
-M:     Matt Fleming <matt@codeblueprint.co.uk>
+M:     Ard Biesheuvel <ard.biesheuvel@linaro.org>
 S:     Maintained
 F:     drivers/firmware/efi/test/
 
 EFI VARIABLE FILESYSTEM
 M:     Matthew Garrett <matthew.garrett@nebula.com>
 M:     Jeremy Kerr <jk@ozlabs.org>
-M:     Matt Fleming <matt@codeblueprint.co.uk>
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
+M:     Ard Biesheuvel <ard.biesheuvel@linaro.org>
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
 L:     linux-efi@vger.kernel.org
 S:     Maintained
 F:     fs/efivarfs/
@@ -5320,7 +5318,6 @@ S:        Supported
 F:     security/integrity/evm/
 
 EXTENSIBLE FIRMWARE INTERFACE (EFI)
-M:     Matt Fleming <matt@codeblueprint.co.uk>
 M:     Ard Biesheuvel <ard.biesheuvel@linaro.org>
 L:     linux-efi@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
@@ -9641,8 +9638,8 @@ F:        include/uapi/linux/sunrpc/
 NILFS2 FILESYSTEM
 M:     Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
 L:     linux-nilfs@vger.kernel.org
-W:     http://nilfs.sourceforge.net/
-W:     http://nilfs.osdn.jp/
+W:     https://nilfs.sourceforge.io/
+W:     https://nilfs.osdn.jp/
 T:     git git://github.com/konis/nilfs2.git
 S:     Supported
 F:     Documentation/filesystems/nilfs2.txt
@@ -10137,7 +10134,7 @@ F:      drivers/irqchip/irq-ompic.c
 F:     drivers/irqchip/irq-or1k-*
 
 OPENVSWITCH
-M:     Pravin Shelar <pshelar@nicira.com>
+M:     Pravin B Shelar <pshelar@ovn.org>
 L:     netdev@vger.kernel.org
 L:     dev@openvswitch.org
 W:     http://openvswitch.org
@@ -13493,6 +13490,7 @@ M:      Mika Westerberg <mika.westerberg@linux.intel.com>
 M:     Yehezkel Bernat <yehezkel.bernat@intel.com>
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
 S:     Maintained
+F:     Documentation/admin-guide/thunderbolt.rst
 F:     drivers/thunderbolt/
 F:     include/linux/thunderbolt.h
 
index 7e02f951b284187d5354c2b7bd39b0ef1bf5d903..bf5b8cbb9469db3ab4a42e636746cab82a093044 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 15
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc8
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -484,26 +484,6 @@ CLANG_GCC_TC       := --gcc-toolchain=$(GCC_TOOLCHAIN)
 endif
 KBUILD_CFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
 KBUILD_AFLAGS += $(CLANG_TARGET) $(CLANG_GCC_TC)
-KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
-KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
-# Quiet clang warning: comparison of unsigned expression < 0 is always false
-KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
-# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
-# source of a reference will be _MergedGlobals and not on of the whitelisted names.
-# See modpost pattern 2
-KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
-KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
-KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
-KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
-else
-
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
 endif
 
 ifeq ($(config-targets),1)
@@ -716,6 +696,29 @@ ifdef CONFIG_CC_STACKPROTECTOR
 endif
 KBUILD_CFLAGS += $(stackp-flag)
 
+ifeq ($(cc-name),clang)
+KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
+KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+# Quiet clang warning: comparison of unsigned expression < 0 is always false
+KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
+# CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
+# source of a reference will be _MergedGlobals and not on of the whitelisted names.
+# See modpost pattern 2
+KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
+KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
+KBUILD_CFLAGS += $(call cc-option, -no-integrated-as)
+KBUILD_AFLAGS += $(call cc-option, -no-integrated-as)
+else
+
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+endif
+
 ifdef CONFIG_FRAME_POINTER
 KBUILD_CFLAGS  += -fno-omit-frame-pointer -fno-optimize-sibling-calls
 else
@@ -789,6 +792,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS  += $(call cc-option,-fno-strict-overflow)
 
+# Make sure -fstack-check isn't enabled (like gentoo apparently did)
+KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)
+
 # conserve stack if available
 KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
 
index 4e6e9f57e790ac0cefcade55664124c9351e830e..dc91c663bcc02e2cdc116f40ad31757d711ac485 100644 (file)
                        reg = <0x80 0x10>, <0x100 0x10>;
                        #clock-cells = <0>;
                        clocks = <&input_clk>;
+
+                       /*
+                        * Set initial core pll output frequency to 90MHz.
+                        * It will be applied at the core pll driver probing
+                        * on early boot.
+                        */
+                       assigned-clocks = <&core_clk>;
+                       assigned-clock-rates = <90000000>;
                };
 
                core_intc: archs-intc@cpu {
index 63954a8b0100ebf5746394fedb62b4825048d903..69ff4895f2ba4b558f2bdfed547ef0ec27288174 100644 (file)
                        reg = <0x80 0x10>, <0x100 0x10>;
                        #clock-cells = <0>;
                        clocks = <&input_clk>;
+
+                       /*
+                        * Set initial core pll output frequency to 100MHz.
+                        * It will be applied at the core pll driver probing
+                        * on early boot.
+                        */
+                       assigned-clocks = <&core_clk>;
+                       assigned-clock-rates = <100000000>;
                };
 
                core_intc: archs-intc@cpu {
index 8f627c200d609148c55731aac99b2a353e72f126..006aa3de5348f31c7462f52f173ad2e74434d062 100644 (file)
                        reg = <0x00 0x10>, <0x14B8 0x4>;
                        #clock-cells = <0>;
                        clocks = <&input_clk>;
+
+                       /*
+                        * Set initial core pll output frequency to 1GHz.
+                        * It will be applied at the core pll driver probing
+                        * on early boot.
+                        */
+                       assigned-clocks = <&core_clk>;
+                       assigned-clock-rates = <1000000000>;
                };
 
                serial: serial@5000 {
index 7b8f8faf8a24315d3379d189cab69506539e04a8..ac6b0ed8341eeff9afdef2544fa48aaac5641deb 100644 (file)
@@ -49,10 +49,11 @@ CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
+CONFIG_DRM=y
+# CONFIG_DRM_FBDEV_EMULATION is not set
+CONFIG_DRM_UDL=y
 CONFIG_FB=y
-CONFIG_FB_UDL=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_USB=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_OHCI_HCD=y
index f35974ee7264a1e13c3fb82b19dc2cc84b9fcc99..c9173c02081c0c3c81e136e3ae226562f5505b8e 100644 (file)
@@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
                return 0;
 
        __asm__ __volatile__(
+       "       mov     lp_count, %5            \n"
        "       lp      3f                      \n"
        "1:     ldb.ab  %3, [%2, 1]             \n"
        "       breq.d  %3, 0, 3f               \n"
@@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
        "       .word   1b, 4b                  \n"
        "       .previous                       \n"
        : "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
-       : "g"(-EFAULT), "l"(count)
-       : "memory");
+       : "g"(-EFAULT), "r"(count)
+       : "lp_count", "lp_start", "lp_end", "memory");
 
        return res;
 }
index 7ef7d9a8ff89231811e73a241a3a3c6d248e720b..9d27331fe69a0eb441b34e51324138ef375070b0 100644 (file)
@@ -199,7 +199,7 @@ static void read_arc_build_cfg_regs(void)
                        unsigned int exec_ctrl;
 
                        READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
-                       cpu->extn.dual_enb = exec_ctrl & 1;
+                       cpu->extn.dual_enb = !(exec_ctrl & 1);
 
                        /* dual issue always present for this core */
                        cpu->extn.dual = 1;
index 74315f302971b1989b305b5c8a3ad14517a97cc1..bf40e06f3fb84fec42cefe779d92b18829524af6 100644 (file)
@@ -163,7 +163,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
  */
 static int __print_sym(unsigned int address, void *unused)
 {
-       __print_symbol("  %s\n", address);
+       printk("  %pS\n", (void *)address);
        return 0;
 }
 
index bcd7c9fc5d0fc9868c04ff98d08b2e1492d50bb8..133a4dae41fe7d73b07b0d2365a899b943bc4779 100644 (file)
@@ -83,6 +83,7 @@ DO_ERROR_INFO(SIGILL, "Illegal Insn (or Seq)", insterror_is_error, ILL_ILLOPC)
 DO_ERROR_INFO(SIGBUS, "Invalid Mem Access", __weak do_memory_error, BUS_ADRERR)
 DO_ERROR_INFO(SIGTRAP, "Breakpoint Set", trap_is_brkpt, TRAP_BRKPT)
 DO_ERROR_INFO(SIGBUS, "Misaligned Access", do_misaligned_error, BUS_ADRALN)
+DO_ERROR_INFO(SIGSEGV, "gcc generated __builtin_trap", do_trap5_error, 0)
 
 /*
  * Entry Point for Misaligned Data access Exception, for emulating in software
@@ -115,6 +116,8 @@ void do_machine_check_fault(unsigned long address, struct pt_regs *regs)
  * Thus TRAP_S <n> can be used for specific purpose
  *  -1 used for software breakpointing (gdb)
  *  -2 used by kprobes
+ *  -5 __builtin_trap() generated by gcc (2018.03 onwards) for toggle such as
+ *     -fno-isolate-erroneous-paths-dereference
  */
 void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
 {
@@ -134,6 +137,9 @@ void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
                kgdb_trap(regs);
                break;
 
+       case 5:
+               do_trap5_error(address, regs);
+               break;
        default:
                break;
        }
@@ -155,3 +161,11 @@ void do_insterror_or_kprobe(unsigned long address, struct pt_regs *regs)
 
        insterror_is_error(address, regs);
 }
+
+/*
+ * abort() call generated by older gcc for __builtin_trap()
+ */
+void abort(void)
+{
+       __asm__ __volatile__("trap_s  5\n");
+}
index 7d8c1d6c2f60f918e95f2cafc81c90c028684b9a..6e9a0a9a6a04e1794ca0bbe6ec75dc5ab0617352 100644 (file)
@@ -163,6 +163,9 @@ static void show_ecr_verbose(struct pt_regs *regs)
                else
                        pr_cont("Bus Error, check PRM\n");
 #endif
+       } else if (vec == ECR_V_TRAP) {
+               if (regs->ecr_param == 5)
+                       pr_cont("gcc generated __builtin_trap\n");
        } else {
                pr_cont("Check Programmer's Manual\n");
        }
index f1ac6790da5fe64782b59b720bf3ea80d999bff1..46544e88492d5273de4c6a121538fd4a6418ec65 100644 (file)
@@ -317,25 +317,23 @@ static void __init axs103_early_init(void)
         * Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
         * of fudging the freq in DT
         */
+#define AXS103_QUAD_CORE_CPU_FREQ_HZ   50000000
+
        unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
        if (num_cores > 2) {
-               u32 freq = 50, orig;
-               /*
-                * TODO: use cpu node "cpu-freq" param instead of platform-specific
-                * "/cpu_card/core_clk" as it works only if we use fixed-clock for cpu.
-                */
+               u32 freq;
                int off = fdt_path_offset(initial_boot_params, "/cpu_card/core_clk");
                const struct fdt_property *prop;
 
                prop = fdt_get_property(initial_boot_params, off,
-                                       "clock-frequency", NULL);
-               orig = be32_to_cpu(*(u32*)(prop->data)) / 1000000;
+                                       "assigned-clock-rates", NULL);
+               freq = be32_to_cpu(*(u32 *)(prop->data));
 
                /* Patching .dtb in-place with new core clock value */
-               if (freq != orig ) {
-                       freq = cpu_to_be32(freq * 1000000);
+               if (freq != AXS103_QUAD_CORE_CPU_FREQ_HZ) {
+                       freq = cpu_to_be32(AXS103_QUAD_CORE_CPU_FREQ_HZ);
                        fdt_setprop_inplace(initial_boot_params, off,
-                                           "clock-frequency", &freq, sizeof(freq));
+                                           "assigned-clock-rates", &freq, sizeof(freq));
                }
        }
 #endif
index fd0ae5e38639a8756c86d7882c6e74c88f0e07e5..2958aedb649ab183edcce1ca858006f67fd8ff21 100644 (file)
@@ -38,42 +38,6 @@ static void __init hsdk_init_per_cpu(unsigned int cpu)
 #define CREG_PAE               (CREG_BASE + 0x180)
 #define CREG_PAE_UPDATE                (CREG_BASE + 0x194)
 
-#define CREG_CORE_IF_CLK_DIV   (CREG_BASE + 0x4B8)
-#define CREG_CORE_IF_CLK_DIV_2 0x1
-#define CGU_BASE               ARC_PERIPHERAL_BASE
-#define CGU_PLL_STATUS         (ARC_PERIPHERAL_BASE + 0x4)
-#define CGU_PLL_CTRL           (ARC_PERIPHERAL_BASE + 0x0)
-#define CGU_PLL_STATUS_LOCK    BIT(0)
-#define CGU_PLL_STATUS_ERR     BIT(1)
-#define CGU_PLL_CTRL_1GHZ      0x3A10
-#define HSDK_PLL_LOCK_TIMEOUT  500
-
-#define HSDK_PLL_LOCKED() \
-       !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_LOCK)
-
-#define HSDK_PLL_ERR() \
-       !!(ioread32((void __iomem *) CGU_PLL_STATUS) & CGU_PLL_STATUS_ERR)
-
-static void __init hsdk_set_cpu_freq_1ghz(void)
-{
-       u32 timeout = HSDK_PLL_LOCK_TIMEOUT;
-
-       /*
-        * As we set cpu clock which exceeds 500MHz, the divider for the interface
-        * clock must be programmed to div-by-2.
-        */
-       iowrite32(CREG_CORE_IF_CLK_DIV_2, (void __iomem *) CREG_CORE_IF_CLK_DIV);
-
-       /* Set cpu clock to 1GHz */
-       iowrite32(CGU_PLL_CTRL_1GHZ, (void __iomem *) CGU_PLL_CTRL);
-
-       while (!HSDK_PLL_LOCKED() && timeout--)
-               cpu_relax();
-
-       if (!HSDK_PLL_LOCKED() || HSDK_PLL_ERR())
-               pr_err("Failed to setup CPU frequency to 1GHz!");
-}
-
 #define SDIO_BASE              (ARC_PERIPHERAL_BASE + 0xA000)
 #define SDIO_UHS_REG_EXT       (SDIO_BASE + 0x108)
 #define SDIO_UHS_REG_EXT_DIV_2 (2 << 30)
@@ -98,12 +62,6 @@ static void __init hsdk_init_early(void)
         * minimum possible div-by-2.
         */
        iowrite32(SDIO_UHS_REG_EXT_DIV_2, (void __iomem *) SDIO_UHS_REG_EXT);
-
-       /*
-        * Setup CPU frequency to 1GHz.
-        * TODO: remove it after smart hsdk pll driver will be introduced.
-        */
-       hsdk_set_cpu_freq_1ghz();
 }
 
 static const char *hsdk_compat[] __initconst = {
index 45d815a86d420b9f8f108919bb846f865f9b5231..de08d9045cb85bd83884b8a7c05d78296034a94b 100644 (file)
                                compatible = "aspeed,ast2400-vuart";
                                reg = <0x1e787000 0x40>;
                                reg-shift = <2>;
-                               interrupts = <10>;
+                               interrupts = <8>;
                                clocks = <&clk_uart>;
                                no-loopback-test;
                                status = "disabled";
index 5f29010cdbd8129d57bd2141164f4fdd4e58cc5b..9b82cc8843e1af8df22ee12d4146cd87c725c563 100644 (file)
        jc42@18 {
                compatible = "nxp,se97b", "jedec,jc-42.4-temp";
                reg = <0x18>;
+               smbus-timeout-disable;
        };
 
        dpot: mcp4651-104@28 {
index 413dbd5d9f6442b8dae4b7f132b35e5f816cf798..81942ae83e1f9c9f717dd0fd565a28966c44c1da 100644 (file)
         */
        battery {
                pinctrl-names = "default";
-               pintctrl-0 = <&battery_pins>;
+               pinctrl-0 = <&battery_pins>;
                compatible = "lego,ev3-battery";
                io-channels = <&adc 4>, <&adc 3>;
                io-channel-names = "voltage", "current";
        batt_volt_en {
                gpio-hog;
                gpios = <6 GPIO_ACTIVE_HIGH>;
-               output-low;
+               output-high;
        };
 };
 
index b2b95ff205e81ba9248f3f29416001459e3c3d77..0029ec27819ca361024bfdaccb43b1faededa600 100644 (file)
        status = "okay";
 };
 
+&mixer {
+       status = "okay";
+};
+
 /* eMMC flash */
 &mmc_0 {
        status = "okay";
index 940875316d0f3926b39ea0717e560288f8594084..67b4de0e343921fda48d7e94b228732fe2d0131d 100644 (file)
                                reg = <0x2a>;
                                VDDA-supply = <&reg_3p3v>;
                                VDDIO-supply = <&reg_3p3v>;
-                               clocks = <&sys_mclk 1>;
+                               clocks = <&sys_mclk>;
                        };
                };
        };
index a8b148ad1dd2c37076a6a89d7c02498064fabc15..44715c8ef756b9f44bdc7a7c69b0cdc12c26d93d 100644 (file)
                reg = <0x0a>;
                VDDA-supply = <&reg_3p3v>;
                VDDIO-supply = <&reg_3p3v>;
-               clocks = <&sys_mclk 1>;
+               clocks = <&sys_mclk>;
        };
 };
 
index c6d92c25df42d5a673e991df58f86f566df73679..d23ee6d911acf55cb9330b66330966c33816ac7f 100644 (file)
        };
 };
 
+&cpu0 {
+       cpu0-supply = <&vdd_arm>;
+};
+
 &i2c1 {
        status = "okay";
        clock-frequency = <400000>;
index cd24894ee5c6b14deab21c543bad7c62ec34c93d..6102e4e7f35c15bcb809152453ef3ed82dd579d7 100644 (file)
        iep_mmu: iommu@ff900800 {
                compatible = "rockchip,iommu";
                reg = <0x0 0xff900800 0x0 0x40>;
-               interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH 0>;
+               interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-names = "iep_mmu";
                #iommu-cells = <0>;
                status = "disabled";
index b91300d49a31081269789a6e7437e6d707a8a6f4..5840f5c75c3b388d47242c53aa0c0f7fbe3e3a68 100644 (file)
                        reg = <0x01c16000 0x1000>;
                        interrupts = <58>;
                        clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
-                                <&ccu 9>,
-                                <&ccu 18>;
+                                <&ccu CLK_PLL_VIDEO0_2X>,
+                                <&ccu CLK_PLL_VIDEO1_2X>;
                        clock-names = "ahb", "mod", "pll-0", "pll-1";
                        dmas = <&dma SUN4I_DMA_NORMAL 16>,
                               <&dma SUN4I_DMA_NORMAL 16>,
index 6ae4d95e230e58a468c908d85c2cfaf8d87894a0..316cb8b2945b114224d2c75ffa65a3d6a07f3300 100644 (file)
@@ -82,8 +82,8 @@
                        reg = <0x01c16000 0x1000>;
                        interrupts = <58>;
                        clocks = <&ccu CLK_AHB_HDMI>, <&ccu CLK_HDMI>,
-                                <&ccu 9>,
-                                <&ccu 16>;
+                                <&ccu CLK_PLL_VIDEO0_2X>,
+                                <&ccu CLK_PLL_VIDEO1_2X>;
                        clock-names = "ahb", "mod", "pll-0", "pll-1";
                        dmas = <&dma SUN4I_DMA_NORMAL 16>,
                               <&dma SUN4I_DMA_NORMAL 16>,
index 8bfa12b548e0a2acf8f285505cd3699e27519e86..72d3fe44ecaf0d53f16b28b6433e9853d1a13c2f 100644 (file)
                        interrupts = <GIC_SPI 88 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&ccu CLK_AHB1_HDMI>, <&ccu CLK_HDMI>,
                                 <&ccu CLK_HDMI_DDC>,
-                                <&ccu 7>,
-                                <&ccu 13>;
+                                <&ccu CLK_PLL_VIDEO0_2X>,
+                                <&ccu CLK_PLL_VIDEO1_2X>;
                        clock-names = "ahb", "mod", "ddc", "pll-0", "pll-1";
                        resets = <&ccu RST_AHB1_HDMI>;
                        reset-names = "ahb";
index 68dfa82544fc4c574916c3d60c17a60097ddb2b1..59655e42e4b09a75edc0b285e2c18fd4926e21aa 100644 (file)
                        reg = <0x01c16000 0x1000>;
                        interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&ccu CLK_AHB_HDMI0>, <&ccu CLK_HDMI>,
-                                <&ccu 9>,
-                                <&ccu 18>;
+                                <&ccu CLK_PLL_VIDEO0_2X>,
+                                <&ccu CLK_PLL_VIDEO1_2X>;
                        clock-names = "ahb", "mod", "pll-0", "pll-1";
                        dmas = <&dma SUN4I_DMA_NORMAL 16>,
                               <&dma SUN4I_DMA_NORMAL 16>,
index 98715538932f10bd048b355b4f7bb5bfc183a7cd..a021ee6da3968c252a02aefb29c3b99a2e13a096 100644 (file)
        status = "okay";
 
        axp81x: pmic@3a3 {
+               compatible = "x-powers,axp813";
                reg = <0x3a3>;
                interrupt-parent = <&r_intc>;
                interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
index 0ec1b0a317b4c4631d4cfe5d9d451e9d16c697de..ff72a8efb73d05e3d219587654b96987ab1b31e5 100644 (file)
                        reg = <0x6e000 0x400>;
                        ranges = <0 0x6e000 0x400>;
                        interrupt-parent = <&gic>;
-                       interrupt-controller;
                        #address-cells = <1>;
                        #size-cells = <1>;
 
index 5cf04888c581df4a21053a826f8a2fdd58204435..3e26c6f7a191a9621a5234598920bb43f260bbbb 100644 (file)
@@ -793,7 +793,6 @@ void abort(void)
        /* if that doesn't kill us, halt */
        panic("Oops failed to kill thread");
 }
-EXPORT_SYMBOL(abort);
 
 void __init trap_init(void)
 {
index 1712f132b80d2402d94d72ea974a0c3326fa2f52..b83fdc06286a64ece150fb7e419bc587e47c3e34 100644 (file)
                .pushsection .text.fixup,"ax"
                .align  4
 9001:          mov     r4, #-EFAULT
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+               ldr     r5, [sp, #9*4]          @ *err_ptr
+#else
                ldr     r5, [sp, #8*4]          @ *err_ptr
+#endif
                str     r4, [r5]
                ldmia   sp, {r1, r2}            @ retrieve dst, len
                add     r2, r2, r1
index 8be04ec95adf5e4d0969c9c1cc1460e4e31c9caf..5ace9380626a0cc34f074e71a2981c8c42e87c50 100644 (file)
@@ -868,10 +868,10 @@ static const struct dma_slave_map dm365_edma_map[] = {
        { "spi_davinci.0", "rx", EDMA_FILTER_PARAM(0, 17) },
        { "spi_davinci.3", "tx", EDMA_FILTER_PARAM(0, 18) },
        { "spi_davinci.3", "rx", EDMA_FILTER_PARAM(0, 19) },
-       { "dm6441-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
-       { "dm6441-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
-       { "dm6441-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
-       { "dm6441-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
+       { "da830-mmc.0", "rx", EDMA_FILTER_PARAM(0, 26) },
+       { "da830-mmc.0", "tx", EDMA_FILTER_PARAM(0, 27) },
+       { "da830-mmc.1", "rx", EDMA_FILTER_PARAM(0, 30) },
+       { "da830-mmc.1", "tx", EDMA_FILTER_PARAM(0, 31) },
 };
 
 static struct edma_soc_info dm365_edma_pdata = {
@@ -925,12 +925,14 @@ static struct resource edma_resources[] = {
        /* not using TC*_ERR */
 };
 
-static struct platform_device dm365_edma_device = {
-       .name                   = "edma",
-       .id                     = 0,
-       .dev.platform_data      = &dm365_edma_pdata,
-       .num_resources          = ARRAY_SIZE(edma_resources),
-       .resource               = edma_resources,
+static const struct platform_device_info dm365_edma_device __initconst = {
+       .name           = "edma",
+       .id             = 0,
+       .dma_mask       = DMA_BIT_MASK(32),
+       .res            = edma_resources,
+       .num_res        = ARRAY_SIZE(edma_resources),
+       .data           = &dm365_edma_pdata,
+       .size_data      = sizeof(dm365_edma_pdata),
 };
 
 static struct resource dm365_asp_resources[] = {
@@ -1428,13 +1430,18 @@ int __init dm365_init_video(struct vpfe_config *vpfe_cfg,
 
 static int __init dm365_init_devices(void)
 {
+       struct platform_device *edma_pdev;
        int ret = 0;
 
        if (!cpu_is_davinci_dm365())
                return 0;
 
        davinci_cfg_reg(DM365_INT_EDMA_CC);
-       platform_device_register(&dm365_edma_device);
+       edma_pdev = platform_device_register_full(&dm365_edma_device);
+       if (IS_ERR(edma_pdev)) {
+               pr_warn("%s: Failed to register eDMA\n", __func__);
+               return PTR_ERR(edma_pdev);
+       }
 
        platform_device_register(&dm365_mdio_device);
        platform_device_register(&dm365_emac_device);
index 45bdbfb961261becf2fb940fb9da19ee0f99560a..4a8d3f83a36eabc50134577306c06febffb9bed8 100644 (file)
@@ -75,6 +75,7 @@
        pinctrl-0 = <&rgmii_pins>;
        phy-mode = "rgmii";
        phy-handle = <&ext_rgmii_phy>;
+       phy-supply = <&reg_dc1sw>;
        status = "okay";
 };
 
index 806442d3e846881d01e3c971da41af1db869b14f..604cdaedac38eed479a8f24a88efbc4fddc2676e 100644 (file)
@@ -77,6 +77,7 @@
        pinctrl-0 = <&rmii_pins>;
        phy-mode = "rmii";
        phy-handle = <&ext_rmii_phy1>;
+       phy-supply = <&reg_dc1sw>;
        status = "okay";
 
 };
index 0eb2acedf8c3bc5ff2b4bbde4cddfde4d8c204ad..abe179de35d780cc1d3691394d465817dad30549 100644 (file)
@@ -82,6 +82,7 @@
        pinctrl-0 = <&rgmii_pins>;
        phy-mode = "rgmii";
        phy-handle = <&ext_rgmii_phy>;
+       phy-supply = <&reg_dc1sw>;
        status = "okay";
 };
 
@@ -95,7 +96,7 @@
 &mmc2 {
        pinctrl-names = "default";
        pinctrl-0 = <&mmc2_pins>;
-       vmmc-supply = <&reg_vcc3v3>;
+       vmmc-supply = <&reg_dcdc1>;
        vqmmc-supply = <&reg_vcc1v8>;
        bus-width = <8>;
        non-removable;
index a5da18a6f2866d34537c55dcc5d3288daf6c8331..43418bd881d81e73da7634fa6ff62c11d64b65ef 100644 (file)
 
 #include "sun50i-a64.dtsi"
 
-/ {
-       reg_vcc3v3: vcc3v3 {
-               compatible = "regulator-fixed";
-               regulator-name = "vcc3v3";
-               regulator-min-microvolt = <3300000>;
-               regulator-max-microvolt = <3300000>;
-       };
-};
-
 &mmc0 {
        pinctrl-names = "default";
        pinctrl-0 = <&mmc0_pins>;
-       vmmc-supply = <&reg_vcc3v3>;
+       vmmc-supply = <&reg_dcdc1>;
        non-removable;
        disable-wp;
        bus-width = <4>;
index b6b7a561df8c91d90c57bd2d36cbaba37283bb02..a42fd79a62a30695221375b9a18d0209c29d73dd 100644 (file)
@@ -71,7 +71,7 @@
        pinctrl-0 = <&mmc0_pins_a>, <&mmc0_cd_pin>;
        vmmc-supply = <&reg_vcc3v3>;
        bus-width = <4>;
-       cd-gpios = <&pio 5 6 GPIO_ACTIVE_HIGH>;
+       cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;
        status = "okay";
 };
 
index a298df74ca6c037f373f78f5fd310a4d8e8c0e64..dbe2648649db1e1f74bfb7b2bc97f9765658f7d9 100644 (file)
 &avb {
        pinctrl-0 = <&avb_pins>;
        pinctrl-names = "default";
-       renesas,no-ether-link;
        phy-handle = <&phy0>;
        status = "okay";
 
index 0d85b315ce711c8f0d896d7861204036ce9b0ec3..73439cf4865964016c14d2237c2f504d3ddfa430 100644 (file)
 &avb {
        pinctrl-0 = <&avb_pins>;
        pinctrl-names = "default";
-       renesas,no-ether-link;
        phy-handle = <&phy0>;
        status = "okay";
 
index d4f80786e7c20c0a46bacb9636f8defc7ef137b4..3890468678ce1caa78a411aeb0a4a9cdedfc1302 100644 (file)
        assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>;
        assigned-clock-parents = <&gmac_clkin>, <&gmac_clkin>;
        clock_in_out = "input";
+       /* shows instability at 1GBit right now */
+       max-speed = <100>;
        phy-supply = <&vcc_io>;
        phy-mode = "rgmii";
        pinctrl-names = "default";
index 41d61840fb99ce52ec553c94e119ab63bb79cdbe..2426da6319382dbcab52f286860aabd8651087cd 100644 (file)
        tsadc: tsadc@ff250000 {
                compatible = "rockchip,rk3328-tsadc";
                reg = <0x0 0xff250000 0x0 0x100>;
-               interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH 0>;
+               interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>;
                assigned-clocks = <&cru SCLK_TSADC>;
                assigned-clock-rates = <50000>;
                clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>;
index 910628d18add07d9a39974bc6ce2ac4a403adb81..1fc5060d7027e6e4b852c6cd71d430d2e7ab3990 100644 (file)
                regulator-min-microvolt = <5000000>;
                regulator-max-microvolt = <5000000>;
        };
-
-       vdd_log: vdd-log {
-               compatible = "pwm-regulator";
-               pwms = <&pwm2 0 25000 0>;
-               regulator-name = "vdd_log";
-               regulator-min-microvolt = <800000>;
-               regulator-max-microvolt = <1400000>;
-               regulator-always-on;
-               regulator-boot-on;
-               status = "okay";
-       };
 };
 
 &cpu_b0 {
index 48e733136db4580ffd06963f622b7ce7ada92d61..0ac2ace824350185c0e53369b6702875f1274ea0 100644 (file)
                        gpio-controller;
                        #gpio-cells = <2>;
                        gpio-ranges = <&pinctrl 0 0 0>,
-                                     <&pinctrl 96 0 0>,
-                                     <&pinctrl 160 0 0>;
+                                     <&pinctrl 104 0 0>,
+                                     <&pinctrl 168 0 0>;
                        gpio-ranges-group-names = "gpio_range0",
                                                  "gpio_range1",
                                                  "gpio_range2";
index 321c9c05dd9e09fc0c745a4543a286b7628f00a4..f4363d40e2cd7fd62d40d826d5296c95f15cde9f 100644 (file)
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 {
        u64 reg;
 
+       /* Clear pmscr in case of early return */
+       *pmscr_el1 = 0;
+
        /* SPE present on this CPU? */
        if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
                                                  ID_AA64DFR0_PMSVER_SHIFT))
index c6ecb97151a25774bf75ce5a1663210db4f6bce3..9025699049ca63b3834ed917d935224009d2d7a8 100644 (file)
@@ -88,7 +88,7 @@ void vtime_flush(struct task_struct *tsk)
        }
 
        if (ti->softirq_time) {
-               delta = cycle_to_nsec(ti->softirq_time));
+               delta = cycle_to_nsec(ti->softirq_time);
                account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ);
        }
 
index cb79fba79d4391dbe07cd517309f77cbc5b05506..b88a8dd149333d0a0227c28762ca904fbe268ed2 100644 (file)
@@ -122,7 +122,6 @@ void abort(void)
        /* if that doesn't kill us, halt */
        panic("Oops failed to kill thread");
 }
-EXPORT_SYMBOL(abort);
 
 void __init trap_init(void)
 {
index c7ed26029cbbcf0ed7458a145d490957a59abb4e..e68e6e04063a7e0d9f30c50ebac3b082ac7b8b88 100644 (file)
@@ -235,6 +235,7 @@ LEAF(mips_cps_core_init)
        has_mt  t0, 3f
 
        .set    push
+       .set    MIPS_ISA_LEVEL_RAW
        .set    mt
 
        /* Only allow 1 TC per VPE to execute... */
@@ -388,6 +389,7 @@ LEAF(mips_cps_boot_vpes)
 #elif defined(CONFIG_MIPS_MT)
 
        .set    push
+       .set    MIPS_ISA_LEVEL_RAW
        .set    mt
 
        /* If the core doesn't support MT then return */
index 45d0b6b037eeb6a985318df16df6f73a7f1dad9c..57028d49c202aeab53a1b4ccd5d21b3b3052cedb 100644 (file)
@@ -705,6 +705,18 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
        struct task_struct *t;
        int max_users;
 
+       /* If nothing to change, return right away, successfully.  */
+       if (value == mips_get_process_fp_mode(task))
+               return 0;
+
+       /* Only accept a mode change if 64-bit FP enabled for o32.  */
+       if (!IS_ENABLED(CONFIG_MIPS_O32_FP64_SUPPORT))
+               return -EOPNOTSUPP;
+
+       /* And only for o32 tasks.  */
+       if (IS_ENABLED(CONFIG_64BIT) && !test_thread_flag(TIF_32BIT_REGS))
+               return -EOPNOTSUPP;
+
        /* Check the value is valid */
        if (value & ~known_bits)
                return -EOPNOTSUPP;
index efbd8df8b6652e7a81baed64134858fd8b3d733a..0b23b1ad99e65f1e21d1810340f9dd306483b8d3 100644 (file)
@@ -419,63 +419,160 @@ static int gpr64_set(struct task_struct *target,
 
 #endif /* CONFIG_64BIT */
 
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * !CONFIG_CPU_HAS_MSA variant.  FP context's general register slots
+ * correspond 1:1 to buffer slots.  Only general registers are copied.
+ */
+static int fpr_get_fpa(struct task_struct *target,
+                      unsigned int *pos, unsigned int *count,
+                      void **kbuf, void __user **ubuf)
+{
+       return user_regset_copyout(pos, count, kbuf, ubuf,
+                                  &target->thread.fpu,
+                                  0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer,
+ * CONFIG_CPU_HAS_MSA variant.  Only lower 64 bits of FP context's
+ * general register slots are copied to buffer slots.  Only general
+ * registers are copied.
+ */
+static int fpr_get_msa(struct task_struct *target,
+                      unsigned int *pos, unsigned int *count,
+                      void **kbuf, void __user **ubuf)
+{
+       unsigned int i;
+       u64 fpr_val;
+       int err;
+
+       BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+       for (i = 0; i < NUM_FPU_REGS; i++) {
+               fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
+               err = user_regset_copyout(pos, count, kbuf, ubuf,
+                                         &fpr_val, i * sizeof(elf_fpreg_t),
+                                         (i + 1) * sizeof(elf_fpreg_t));
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+/*
+ * Copy the floating-point context to the supplied NT_PRFPREG buffer.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ */
 static int fpr_get(struct task_struct *target,
                   const struct user_regset *regset,
                   unsigned int pos, unsigned int count,
                   void *kbuf, void __user *ubuf)
 {
-       unsigned i;
+       const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
        int err;
-       u64 fpr_val;
 
-       /* XXX fcr31  */
+       if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+               err = fpr_get_fpa(target, &pos, &count, &kbuf, &ubuf);
+       else
+               err = fpr_get_msa(target, &pos, &count, &kbuf, &ubuf);
+       if (err)
+               return err;
 
-       if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
-               return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-                                          &target->thread.fpu,
-                                          0, sizeof(elf_fpregset_t));
+       err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.fpu.fcr31,
+                                 fcr31_pos, fcr31_pos + sizeof(u32));
 
-       for (i = 0; i < NUM_FPU_REGS; i++) {
-               fpr_val = get_fpr64(&target->thread.fpu.fpr[i], 0);
-               err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-                                         &fpr_val, i * sizeof(elf_fpreg_t),
-                                         (i + 1) * sizeof(elf_fpreg_t));
+       return err;
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * !CONFIG_CPU_HAS_MSA variant.   Buffer slots correspond 1:1 to FP
+ * context's general register slots.  Only general registers are copied.
+ */
+static int fpr_set_fpa(struct task_struct *target,
+                      unsigned int *pos, unsigned int *count,
+                      const void **kbuf, const void __user **ubuf)
+{
+       return user_regset_copyin(pos, count, kbuf, ubuf,
+                                 &target->thread.fpu,
+                                 0, NUM_FPU_REGS * sizeof(elf_fpreg_t));
+}
+
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context,
+ * CONFIG_CPU_HAS_MSA variant.  Buffer slots are copied to lower 64
+ * bits only of FP context's general register slots.  Only general
+ * registers are copied.
+ */
+static int fpr_set_msa(struct task_struct *target,
+                      unsigned int *pos, unsigned int *count,
+                      const void **kbuf, const void __user **ubuf)
+{
+       unsigned int i;
+       u64 fpr_val;
+       int err;
+
+       BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
+       for (i = 0; i < NUM_FPU_REGS && *count > 0; i++) {
+               err = user_regset_copyin(pos, count, kbuf, ubuf,
+                                        &fpr_val, i * sizeof(elf_fpreg_t),
+                                        (i + 1) * sizeof(elf_fpreg_t));
                if (err)
                        return err;
+               set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
        }
 
        return 0;
 }
 
+/*
+ * Copy the supplied NT_PRFPREG buffer to the floating-point context.
+ * Choose the appropriate helper for general registers, and then copy
+ * the FCSR register separately.
+ *
+ * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
+ * which is supposed to have been guaranteed by the kernel before
+ * calling us, e.g. in `ptrace_regset'.  We enforce that requirement,
+ * so that we can safely avoid preinitializing temporaries for
+ * partial register writes.
+ */
 static int fpr_set(struct task_struct *target,
                   const struct user_regset *regset,
                   unsigned int pos, unsigned int count,
                   const void *kbuf, const void __user *ubuf)
 {
-       unsigned i;
+       const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+       u32 fcr31;
        int err;
-       u64 fpr_val;
 
-       /* XXX fcr31  */
+       BUG_ON(count % sizeof(elf_fpreg_t));
+
+       if (pos + count > sizeof(elf_fpregset_t))
+               return -EIO;
 
        init_fp_ctx(target);
 
-       if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
-               return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                         &target->thread.fpu,
-                                         0, sizeof(elf_fpregset_t));
+       if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
+               err = fpr_set_fpa(target, &pos, &count, &kbuf, &ubuf);
+       else
+               err = fpr_set_msa(target, &pos, &count, &kbuf, &ubuf);
+       if (err)
+               return err;
 
-       BUILD_BUG_ON(sizeof(fpr_val) != sizeof(elf_fpreg_t));
-       for (i = 0; i < NUM_FPU_REGS && count >= sizeof(elf_fpreg_t); i++) {
+       if (count > 0) {
                err = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                        &fpr_val, i * sizeof(elf_fpreg_t),
-                                        (i + 1) * sizeof(elf_fpreg_t));
+                                        &fcr31,
+                                        fcr31_pos, fcr31_pos + sizeof(u32));
                if (err)
                        return err;
-               set_fpr64(&target->thread.fpu.fpr[i], 0, fpr_val);
+
+               ptrace_setfcr31(target, fcr31);
        }
 
-       return 0;
+       return err;
 }
 
 enum mips_regset {
index 9345b44b86f036572e33721eb80e9bbbe4493aa4..f57118e1f6b4265257799ae2cf8ea356077e20b9 100644 (file)
@@ -123,8 +123,8 @@ int puts(const char *s)
        while ((nuline = strchr(s, '\n')) != NULL) {
                if (nuline != s)
                        pdc_iodc_print(s, nuline - s);
-                       pdc_iodc_print("\r\n", 2);
-                       s = nuline + 1;
+               pdc_iodc_print("\r\n", 2);
+               s = nuline + 1;
        }
        if (*s != '\0')
                pdc_iodc_print(s, strlen(s));
index dd5a08aaa4da746ff09b19cf5f93e9d58489ff14..3eb4bfc1fb365478f11c9c87bfb3b3124fab567a 100644 (file)
@@ -12,6 +12,7 @@
    for the semaphore.  */
 
 #define __PA_LDCW_ALIGNMENT    16
+#define __PA_LDCW_ALIGN_ORDER  4
 #define __ldcw_align(a) ({                                     \
        unsigned long __ret = (unsigned long) &(a)->lock[0];    \
        __ret = (__ret + __PA_LDCW_ALIGNMENT - 1)               \
@@ -29,6 +30,7 @@
    ldcd). */
 
 #define __PA_LDCW_ALIGNMENT    4
+#define __PA_LDCW_ALIGN_ORDER  2
 #define __ldcw_align(a) (&(a)->slock)
 #define __LDCW "ldcw,co"
 
index c980a02a52bc0dda0a23b205f59d1d86438553f2..598c8d60fa5e602cc9303e1986ada9680d64feb3 100644 (file)
@@ -35,7 +35,12 @@ struct thread_info {
 
 /* thread information allocation */
 
+#ifdef CONFIG_IRQSTACKS
+#define THREAD_SIZE_ORDER      2 /* PA-RISC requires at least 16k stack */
+#else
 #define THREAD_SIZE_ORDER      3 /* PA-RISC requires at least 32k stack */
+#endif
+
 /* Be sure to hunt all references to this down when you change the size of
  * the kernel stack */
 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
index d8f77358e2ba29746730f34ba652db327bb2f1d5..29b99b8964aa6c3171d7633ab9863014510330f5 100644 (file)
@@ -870,7 +870,7 @@ static void print_parisc_device(struct parisc_device *dev)
        static int count;
 
        print_pa_hwpath(dev, hw_path);
-       printk(KERN_INFO "%d. %s at 0x%p [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
+       printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }",
                ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type,
                dev->id.hversion_rev, dev->id.hversion, dev->id.sversion);
 
index a4fd296c958e8e14f13a913aca50510b11eb49b7..e95207c0565eb12308e12d48c45bd5309ae6e2ae 100644 (file)
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm/signal.h>
 #include <asm/unistd.h>
+#include <asm/ldcw.h>
 #include <asm/thread_info.h>
 
 #include <linux/linkage.h>
 #endif
 
        .import         pa_tlb_lock,data
+       .macro  load_pa_tlb_lock reg
+#if __PA_LDCW_ALIGNMENT > 4
+       load32  PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
+       depi    0,31,__PA_LDCW_ALIGN_ORDER, \reg
+#else
+       load32  PA(pa_tlb_lock), \reg
+#endif
+       .endm
 
        /* space_to_prot macro creates a prot id from a space id */
 
        .macro          tlb_lock        spc,ptp,pte,tmp,tmp1,fault
 #ifdef CONFIG_SMP
        cmpib,COND(=),n 0,\spc,2f
-       load32          PA(pa_tlb_lock),\tmp
+       load_pa_tlb_lock \tmp
 1:     LDCW            0(\tmp),\tmp1
        cmpib,COND(=)   0,\tmp1,1b
        nop
        /* Release pa_tlb_lock lock. */
        .macro          tlb_unlock1     spc,tmp
 #ifdef CONFIG_SMP
-       load32          PA(pa_tlb_lock),\tmp
+       load_pa_tlb_lock \tmp
        tlb_unlock0     \spc,\tmp
 #endif
        .endm
@@ -878,9 +887,6 @@ ENTRY_CFI(syscall_exit_rfi)
        STREG   %r19,PT_SR7(%r16)
 
 intr_return:
-       /* NOTE: Need to enable interrupts incase we schedule. */
-       ssm     PSW_SM_I, %r0
-
        /* check for reschedule */
        mfctl   %cr30,%r1
        LDREG   TI_FLAGS(%r1),%r19      /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +913,11 @@ intr_check_sig:
        LDREG   PT_IASQ1(%r16), %r20
        cmpib,COND(=),n 0,%r20,intr_restore /* backward */
 
+       /* NOTE: We need to enable interrupts if we have to deliver
+        * signals. We used to do this earlier but it caused kernel
+        * stack overflows. */
+       ssm     PSW_SM_I, %r0
+
        copy    %r0, %r25                       /* long in_syscall = 0 */
 #ifdef CONFIG_64BIT
        ldo     -16(%r30),%r29                  /* Reference param save area */
@@ -958,6 +969,10 @@ intr_do_resched:
        cmpib,COND(=)   0, %r20, intr_do_preempt
        nop
 
+       /* NOTE: We need to enable interrupts if we schedule.  We used
+        * to do this earlier but it caused kernel stack overflows. */
+       ssm     PSW_SM_I, %r0
+
 #ifdef CONFIG_64BIT
        ldo     -16(%r30),%r29          /* Reference param save area */
 #endif
index e3a8e5e4d5de75897adcea4134f87c7246f60646..8d072c44f300c16d45ba8f4ee0c2eee6435e4ddd 100644 (file)
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
 
 
        __INITRODATA
+       .align 4
        .export os_hpmc_size
 os_hpmc_size:
        .word .os_hpmc_end-.os_hpmc
index adf7187f89515ec69b19461f60ce379e552397dc..2d40c4ff3f6918ae9b2e2c6af71e20658a9850e1 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/assembly.h>
 #include <asm/pgtable.h>
 #include <asm/cache.h>
+#include <asm/ldcw.h>
 #include <linux/linkage.h>
 
        .text
@@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local)
 
        .macro  tlb_lock        la,flags,tmp
 #ifdef CONFIG_SMP
-       ldil            L%pa_tlb_lock,%r1
-       ldo             R%pa_tlb_lock(%r1),\la
+#if __PA_LDCW_ALIGNMENT > 4
+       load32          pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
+       depi            0,31,__PA_LDCW_ALIGN_ORDER, \la
+#else
+       load32          pa_tlb_lock, \la
+#endif
        rsm             PSW_SM_I,\flags
 1:     LDCW            0(\la),\tmp
        cmpib,<>,n      0,\tmp,3f
index 30f92391a93ef6d5a90970b81921a2133d5e2eb0..cad3e8661cd6cf1895c8b69605dbdad730a4c8e6 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/personality.h>
 #include <linux/ptrace.h>
@@ -183,6 +184,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
        return 1;
 }
 
+/*
+ * Idle thread support
+ *
+ * Detect when running on QEMU with SeaBIOS PDC Firmware and let
+ * QEMU idle the host too.
+ */
+
+int running_on_qemu __read_mostly;
+
+void __cpuidle arch_cpu_idle_dead(void)
+{
+       /* nop on real hardware, qemu will offline CPU. */
+       asm volatile("or %%r31,%%r31,%%r31\n":::);
+}
+
+void __cpuidle arch_cpu_idle(void)
+{
+       local_irq_enable();
+
+       /* nop on real hardware, qemu will idle sleep. */
+       asm volatile("or %%r10,%%r10,%%r10\n":::);
+}
+
+static int __init parisc_idle_init(void)
+{
+       const char *marker;
+
+       /* check QEMU/SeaBIOS marker in PAGE0 */
+       marker = (char *) &PAGE0->pad0;
+       running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
+
+       if (!running_on_qemu)
+               cpu_idle_poll_ctrl(1);
+
+       return 0;
+}
+arch_initcall(parisc_idle_init);
+
 /*
  * Copy architecture-specific thread state
  */
index 5a657986ebbf4bef7beff4e8c8d20f1343872347..143f90e2f9f3c631616d4af52f0fe3fa08f44af9 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include <linux/kallsyms.h>
 #include <linux/sort.h>
-#include <linux/sched.h>
 
 #include <linux/uaccess.h>
 #include <asm/assembly.h>
index 7eab4bb8abe630b14c54c3b457285b4228607dc6..66e506520505d8a3245d49d492831df5e3bbb42a 100644 (file)
@@ -16,9 +16,7 @@
 #include <linux/preempt.h>
 #include <linux/init.h>
 
-#include <asm/processor.h>
 #include <asm/delay.h>
-
 #include <asm/special_insns.h>    /* for mfctl() */
 #include <asm/processor.h> /* for boot_cpu_data */
 
index 13f7854e0d49cc796d98a438cad64d76916d6bc9..48f41399fc0b8b63acd84d774a09ad2d0aba5086 100644 (file)
@@ -631,11 +631,11 @@ void __init mem_init(void)
        mem_init_print_info(NULL);
 #ifdef CONFIG_DEBUG_KERNEL /* double-sanity-check paranoia */
        printk("virtual kernel memory layout:\n"
-              "    vmalloc : 0x%p - 0x%p   (%4ld MB)\n"
-              "    memory  : 0x%p - 0x%p   (%4ld MB)\n"
-              "      .init : 0x%p - 0x%p   (%4ld kB)\n"
-              "      .data : 0x%p - 0x%p   (%4ld kB)\n"
-              "      .text : 0x%p - 0x%p   (%4ld kB)\n",
+              "    vmalloc : 0x%px - 0x%px   (%4ld MB)\n"
+              "    memory  : 0x%px - 0x%px   (%4ld MB)\n"
+              "      .init : 0x%px - 0x%px   (%4ld kB)\n"
+              "      .data : 0x%px - 0x%px   (%4ld kB)\n"
+              "      .text : 0x%px - 0x%px   (%4ld kB)\n",
 
               (void*)VMALLOC_START, (void*)VMALLOC_END,
               (VMALLOC_END - VMALLOC_START) >> 20,
index a703452d67b62f63b455098e88988fcc0b3776c6..555e22d5e07f9e21c322af718df72bdf2da0dfa4 100644 (file)
@@ -209,5 +209,11 @@ exc_##label##_book3e:
        ori     r3,r3,vector_offset@l;          \
        mtspr   SPRN_IVOR##vector_number,r3;
 
+#define RFI_TO_KERNEL                                                  \
+       rfi
+
+#define RFI_TO_USER                                                    \
+       rfi
+
 #endif /* _ASM_POWERPC_EXCEPTION_64E_H */
 
index b27205297e1d9ca5ca46db9e7189bf9187385803..7197b179c1b150ba819f13a1f7feb6c9a45da1f9 100644 (file)
  */
 #define EX_R3          EX_DAR
 
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT                                                 \
+       RFI_FLUSH_FIXUP_SECTION;                                        \
+       nop;                                                            \
+       nop;                                                            \
+       nop
+
+#define RFI_TO_KERNEL                                                  \
+       rfid
+
+#define RFI_TO_USER                                                    \
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL                                          \
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
+
+#define RFI_TO_GUEST                                                   \
+       RFI_FLUSH_SLOT;                                                 \
+       rfid;                                                           \
+       b       rfi_flush_fallback
+
+#define HRFI_TO_KERNEL                                                 \
+       hrfid
+
+#define HRFI_TO_USER                                                   \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL                                         \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
+#define HRFI_TO_GUEST                                                  \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN                                                        \
+       RFI_FLUSH_SLOT;                                                 \
+       hrfid;                                                          \
+       b       hrfi_flush_fallback
+
 #ifdef CONFIG_RELOCATABLE
 #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)                   \
        mfspr   r11,SPRN_##h##SRR0;     /* save SRR0 */                 \
@@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        mtspr   SPRN_##h##SRR0,r12;                                     \
        mfspr   r12,SPRN_##h##SRR1;     /* and SRR1 */                  \
        mtspr   SPRN_##h##SRR1,r10;                                     \
-       h##rfid;                                                        \
+       h##RFI_TO_KERNEL;                                               \
        b       .       /* prevent speculative execution */
 #define EXCEPTION_PROLOG_PSERIES_1(label, h)                           \
        __EXCEPTION_PROLOG_PSERIES_1(label, h)
@@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        mtspr   SPRN_##h##SRR0,r12;                                     \
        mfspr   r12,SPRN_##h##SRR1;     /* and SRR1 */                  \
        mtspr   SPRN_##h##SRR1,r10;                                     \
-       h##rfid;                                                        \
+       h##RFI_TO_KERNEL;                                               \
        b       .       /* prevent speculative execution */
 
 #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h)                      \
index 8f88f771cc55ce982c11599b273e584babd38600..1e82eb3caabd19c69289957da188b563d0bcd0d6 100644 (file)
@@ -187,7 +187,20 @@ label##3:                                          \
        FTR_ENTRY_OFFSET label##1b-label##3b;           \
        .popsection;
 
+#define RFI_FLUSH_FIXUP_SECTION                                \
+951:                                                   \
+       .pushsection __rfi_flush_fixup,"a";             \
+       .align 2;                                       \
+952:                                                   \
+       FTR_ENTRY_OFFSET 951b-952b;                     \
+       .popsection;
+
+
 #ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
 void apply_feature_fixups(void);
 void setup_feature_keys(void);
 #endif
index a409177be8bdfd5f717af6111700d088fb91e61b..f0461618bf7bee95ffd27874e33501bd41ef80a4 100644 (file)
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_PERF_COUNT        0x1BC
 #define H_MANAGE_TRACE          0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
 #define H_QUERY_INT_STATE       0x1E4
 #define H_POLL_PENDING         0x1D8
 #define H_SIGNAL_SYS_RESET_ALL_OTHERS          -2
 /* >= 0 values are CPU number */
 
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31      (1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED   (1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30     (1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2     (1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV     (1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY    (1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR       (1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR  (1ull << 61) // IBM bit 2
+
 /* Flag values used in H_REGISTER_PROC_TBL hcall */
 #define PROC_TABLE_OP_MASK     0x18
 #define PROC_TABLE_DEREG       0x10
@@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
        }
 }
 
+struct h_cpu_char_result {
+       u64 character;
+       u64 behaviour;
+};
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */
index 6177d43f0ce8afa9c1f6a1101e92ba161e47d97a..e2a2b8400490049143edee40316313a906ca6db7 100644 (file)
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
 #endif
 }
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 #ifndef CONFIG_PPC_BOOK3S_64
index 3892db93b8374e1f1256a6b497d384f3faedc06e..23ac7fc0af23b6cae8f4706665102dd7e2050667 100644 (file)
@@ -232,6 +232,16 @@ struct paca_struct {
        struct sibling_subcore_state *sibling_subcore_state;
 #endif
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+       /*
+        * rfi fallback flush must be in its own cacheline to prevent
+        * other paca data leaking into the L1d
+        */
+       u64 exrfi[EX_SIZE] __aligned(0x80);
+       void *rfi_flush_fallback_area;
+       u64 l1d_flush_congruence;
+       u64 l1d_flush_sets;
+#endif
 };
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
index 7f01b22fa6cb0d0895c914423a98cb2361fb2a16..55eddf50d1498020ba7f17216c689ab89b84f3c7 100644 (file)
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
        return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
 }
 
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+       unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+       long rc;
+
+       rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+       if (rc == H_SUCCESS) {
+               p->character = retbuf[0];
+               p->behaviour = retbuf[1];
+       }
+
+       return rc;
+}
+
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
index cf00ec26303aef1415c7d5fcd7425f2149ac0295..469b7fdc9be41cd9ab0ceb7f50c2b633c19f01a4 100644 (file)
@@ -39,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
 static inline void pseries_little_endian_exceptions(void) {}
 #endif /* CONFIG_PPC_PSERIES */
 
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+       L1D_FLUSH_NONE          = 0x1,
+       L1D_FLUSH_FALLBACK      = 0x2,
+       L1D_FLUSH_ORI           = 0x4,
+       L1D_FLUSH_MTTRIG        = 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_SETUP_H */
index 6b958414b4e036ac1e4c97bceb61277ffab65e76..f390d57cf2e1a711335bbd66cf7819e9dcd8442f 100644 (file)
@@ -237,6 +237,11 @@ int main(void)
        OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
        OFFSET(PACA_IN_MCE, paca_struct, in_mce);
        OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+       OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+       OFFSET(PACA_EXRFI, paca_struct, exrfi);
+       OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
+       OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+
 #endif
        OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
        OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
index 3320bcac71928eeaf85b884076e90767be4d264f..2748584b767da3f5d788c0be27b27662053853e4 100644 (file)
 #include <asm/tm.h>
 #include <asm/ppc-opcode.h>
 #include <asm/export.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
 
 /*
  * System calls.
@@ -262,13 +267,23 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
        ld      r13,GPR13(r1)   /* only restore r13 if returning to usermode */
+       ld      r2,GPR2(r1)
+       ld      r1,GPR1(r1)
+       mtlr    r4
+       mtcr    r5
+       mtspr   SPRN_SRR0,r7
+       mtspr   SPRN_SRR1,r8
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
+
+       /* exit to kernel */
 1:     ld      r2,GPR2(r1)
        ld      r1,GPR1(r1)
        mtlr    r4
        mtcr    r5
        mtspr   SPRN_SRR0,r7
        mtspr   SPRN_SRR1,r8
-       RFI
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
 .Lsyscall_error:
@@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        mtmsrd  r10, 1
        mtspr   SPRN_SRR0, r11
        mtspr   SPRN_SRR1, r12
-
-       rfid
+       RFI_TO_USER
        b       .       /* prevent speculative execution */
 #endif
 _ASM_NOKPROBE_SYMBOL(system_call_common);
@@ -878,7 +892,7 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
        REST_GPR(13, r1)
-1:
+
        mtspr   SPRN_SRR1,r3
 
        ld      r2,_CCR(r1)
@@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        ld      r3,GPR3(r1)
        ld      r4,GPR4(r1)
        ld      r1,GPR1(r1)
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
 
-       rfid
+1:     mtspr   SPRN_SRR1,r3
+
+       ld      r2,_CCR(r1)
+       mtcrf   0xFF,r2
+       ld      r2,_NIP(r1)
+       mtspr   SPRN_SRR0,r2
+
+       ld      r0,GPR0(r1)
+       ld      r2,GPR2(r1)
+       ld      r3,GPR3(r1)
+       ld      r4,GPR4(r1)
+       ld      r1,GPR1(r1)
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
 #endif /* CONFIG_PPC_BOOK3E */
@@ -1073,7 +1101,7 @@ __enter_rtas:
        
        mtspr   SPRN_SRR0,r5
        mtspr   SPRN_SRR1,r6
-       rfid
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
 rtas_return_loc:
@@ -1098,7 +1126,7 @@ rtas_return_loc:
 
        mtspr   SPRN_SRR0,r3
        mtspr   SPRN_SRR1,r4
-       rfid
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 _ASM_NOKPROBE_SYMBOL(__enter_rtas)
 _ASM_NOKPROBE_SYMBOL(rtas_return_loc)
@@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)
        LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
        andc    r11,r11,r12
        mtsrr1  r11
-       rfid
+       RFI_TO_KERNEL
 #endif /* CONFIG_PPC_BOOK3E */
 
 1:     /* Return from OF */
index e441b469dc8f61f6381c7c95b086b677004d401b..2dc10bf646b887b51dc2dc28feeb4aac6d9fc00d 100644 (file)
@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
        LOAD_HANDLER(r12, machine_check_handle_early)
 1:     mtspr   SPRN_SRR0,r12
        mtspr   SPRN_SRR1,r11
-       rfid
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 2:
        /* Stack overflow. Stay on emergency stack and panic.
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
        li      r3,MSR_ME
        andc    r10,r10,r3              /* Turn off MSR_ME */
        mtspr   SPRN_SRR1,r10
-       rfid
+       RFI_TO_KERNEL
        b       .
 2:
        /*
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
         */
        bl      machine_check_queue_event
        MACHINE_CHECK_HANDLER_WINDUP
-       rfid
+       RFI_TO_USER_OR_KERNEL
 9:
        /* Deliver the machine check to host kernel in V mode. */
        MACHINE_CHECK_HANDLER_WINDUP
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
        stw     r9,PACA_EXSLB+EX_CCR(r13)       /* save CR in exc. frame */
        std     r10,PACA_EXSLB+EX_LR(r13)       /* save LR */
 
+       andi.   r9,r11,MSR_PR   // Check for exception from userspace
+       cmpdi   cr4,r9,MSR_PR   // And save the result in CR4 for later
+
        /*
         * Test MSR_RI before calling slb_allocate_realmode, because the
         * MSR in r11 gets clobbered. However we still want to allocate
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
        /* All done -- return from exception. */
 
+       bne     cr4,1f          /* returning to kernel */
+
 .machine       push
 .machine       "power4"
        mtcrf   0x80,r9
+       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
        mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
        mtcrf   0x02,r9         /* I/D indication is in cr6 */
        mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
@@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        ld      r11,PACA_EXSLB+EX_R11(r13)
        ld      r12,PACA_EXSLB+EX_R12(r13)
        ld      r13,PACA_EXSLB+EX_R13(r13)
-       rfid
+       RFI_TO_USER
+       b       .       /* prevent speculative execution */
+1:
+.machine       push
+.machine       "power4"
+       mtcrf   0x80,r9
+       mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
+       mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
+       mtcrf   0x02,r9         /* I/D indication is in cr6 */
+       mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
+.machine       pop
+
+       RESTORE_CTR(r9, PACA_EXSLB)
+       RESTORE_PPR_PACA(PACA_EXSLB, r9)
+       mr      r3,r12
+       ld      r9,PACA_EXSLB+EX_R9(r13)
+       ld      r10,PACA_EXSLB+EX_R10(r13)
+       ld      r11,PACA_EXSLB+EX_R11(r13)
+       ld      r12,PACA_EXSLB+EX_R12(r13)
+       ld      r13,PACA_EXSLB+EX_R13(r13)
+       RFI_TO_KERNEL
        b       .       /* prevent speculative execution */
 
+
 2:     std     r3,PACA_EXSLB+EX_DAR(r13)
        mr      r3,r12
        mfspr   r11,SPRN_SRR0
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        mtspr   SPRN_SRR0,r10
        ld      r10,PACAKMSR(r13)
        mtspr   SPRN_SRR1,r10
-       rfid
+       RFI_TO_KERNEL
        b       .
 
 8:     std     r3,PACA_EXSLB+EX_DAR(r13)
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        mtspr   SPRN_SRR0,r10
        ld      r10,PACAKMSR(r13)
        mtspr   SPRN_SRR1,r10
-       rfid
+       RFI_TO_KERNEL
        b       .
 
 EXC_COMMON_BEGIN(unrecov_slb)
@@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
        mtspr   SPRN_SRR0,r10 ;                                 \
        ld      r10,PACAKMSR(r13) ;                             \
        mtspr   SPRN_SRR1,r10 ;                                 \
-       rfid ;                                                  \
+       RFI_TO_KERNEL ;                                         \
        b       . ;     /* prevent speculative execution */
 
 #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
@@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)                              \
        xori    r12,r12,MSR_LE ;                                \
        mtspr   SPRN_SRR1,r12 ;                                 \
        mr      r13,r9 ;                                        \
-       rfid ;          /* return to userspace */               \
+       RFI_TO_USER ;   /* return to userspace */               \
        b       . ;     /* prevent speculative execution */
 #else
 #define SYSCALL_FASTENDIAN_TEST
@@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
        mtcr    r11
        REST_GPR(11, r1)
        ld      r1,GPR1(r1)
-       hrfid
+       HRFI_TO_USER_OR_KERNEL
 
 1:     mtcr    r11
        REST_GPR(11, r1)
@@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
        ld      r11,PACA_EXGEN+EX_R11(r13)
        ld      r12,PACA_EXGEN+EX_R12(r13)
        ld      r13,PACA_EXGEN+EX_R13(r13)
-       HRFID
+       HRFI_TO_UNKNOWN
        b       .
 #endif
 
@@ -1418,10 +1445,94 @@ masked_##_H##interrupt:                                 \
        ld      r10,PACA_EXGEN+EX_R10(r13);             \
        ld      r11,PACA_EXGEN+EX_R11(r13);             \
        /* returns to kernel where r13 must be set up, so don't restore it */ \
-       ##_H##rfid;                                     \
+       ##_H##RFI_TO_KERNEL;                            \
        b       .;                                      \
        MASKED_DEC_HANDLER(_H)
 
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+       SET_SCRATCH0(r13);
+       GET_PACA(r13);
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       std     r12,PACA_EXRFI+EX_R12(r13)
+       std     r8,PACA_EXRFI+EX_R13(r13)
+       mfctr   r9
+       ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+       ld      r11,PACA_L1D_FLUSH_SETS(r13)
+       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+       addi    r12,r12,8
+       mtctr   r11
+       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+       /* order ld/st prior to dcbt stop all streams with flushing */
+       sync
+1:     li      r8,0
+       .rept   8 /* 8-way set associative */
+       ldx     r11,r10,r8
+       add     r8,r8,r12
+       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
+       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
+       .endr
+       addi    r10,r10,128 /* 128 byte cache line */
+       bdnz    1b
+
+       mtctr   r9
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ld      r11,PACA_EXRFI+EX_R11(r13)
+       ld      r12,PACA_EXRFI+EX_R12(r13)
+       ld      r8,PACA_EXRFI+EX_R13(r13)
+       GET_SCRATCH0(r13);
+       rfid
+
+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
+       SET_SCRATCH0(r13);
+       GET_PACA(r13);
+       std     r9,PACA_EXRFI+EX_R9(r13)
+       std     r10,PACA_EXRFI+EX_R10(r13)
+       std     r11,PACA_EXRFI+EX_R11(r13)
+       std     r12,PACA_EXRFI+EX_R12(r13)
+       std     r8,PACA_EXRFI+EX_R13(r13)
+       mfctr   r9
+       ld      r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+       ld      r11,PACA_L1D_FLUSH_SETS(r13)
+       ld      r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+       /*
+        * The load adresses are at staggered offsets within cachelines,
+        * which suits some pipelines better (on others it should not
+        * hurt).
+        */
+       addi    r12,r12,8
+       mtctr   r11
+       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+       /* order ld/st prior to dcbt stop all streams with flushing */
+       sync
+1:     li      r8,0
+       .rept   8 /* 8-way set associative */
+       ldx     r11,r10,r8
+       add     r8,r8,r12
+       xor     r11,r11,r11     // Ensure r11 is 0 even if fallback area is not
+       add     r8,r8,r11       // Add 0, this creates a dependency on the ldx
+       .endr
+       addi    r10,r10,128 /* 128 byte cache line */
+       bdnz    1b
+
+       mtctr   r9
+       ld      r9,PACA_EXRFI+EX_R9(r13)
+       ld      r10,PACA_EXRFI+EX_R10(r13)
+       ld      r11,PACA_EXRFI+EX_R11(r13)
+       ld      r12,PACA_EXRFI+EX_R12(r13)
+       ld      r8,PACA_EXRFI+EX_R13(r13)
+       GET_SCRATCH0(r13);
+       hrfid
+
 /*
  * Real mode exceptions actually use this too, but alternate
  * instruction code patches (which end up in the common .text area)
@@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
        addi    r13, r13, 4
        mtspr   SPRN_SRR0, r13
        GET_SCRATCH0(r13)
-       rfid
+       RFI_TO_KERNEL
        b       .
 
 TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
        addi    r13, r13, 4
        mtspr   SPRN_HSRR0, r13
        GET_SCRATCH0(r13)
-       hrfid
+       HRFI_TO_KERNEL
        b       .
 #endif
 
index 5acb5a176dbe5c8bffe6ddb7458b7d3ac2b7019f..72be0c32e902a35fa45e5ed02036df91999dda58 100644 (file)
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
 
        printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
               regs->nip, regs->link, regs->ctr);
-       printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
+       printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
               regs, regs->trap, print_tainted(), init_utsname()->release);
        printk("MSR:  "REG" ", regs->msr);
        print_msr_bits(regs->msr);
index 8956a9856604e72634dd11ef416ec9788660bc37..491be4179ddd989fed6cba1e63963bf47653f4ba 100644 (file)
@@ -801,3 +801,104 @@ static int __init disable_hardlockup_detector(void)
        return 0;
 }
 early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+bool rfi_flush;
+
+static int __init handle_no_rfi_flush(char *p)
+{
+       pr_info("rfi-flush: disabled on command line.");
+       no_rfi_flush = true;
+       return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+       pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+       handle_no_rfi_flush(NULL);
+       return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+       /*
+        * We don't need to do the flush explicitly, just enter+exit kernel is
+        * sufficient, the RFI exit handlers will do the right thing.
+        */
+}
+
+void rfi_flush_enable(bool enable)
+{
+       if (rfi_flush == enable)
+               return;
+
+       if (enable) {
+               do_rfi_flush_fixups(enabled_flush_types);
+               on_each_cpu(do_nothing, NULL, 1);
+       } else
+               do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+       rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+       u64 l1d_size, limit;
+       int cpu;
+
+       l1d_size = ppc64_caches.l1d.size;
+       limit = min(safe_stack_limit(), ppc64_rma_size);
+
+       /*
+        * Align to L1d size, and size it at 2x L1d size, to catch possible
+        * hardware prefetch runoff. We don't have a recipe for load patterns to
+        * reliably avoid the prefetcher.
+        */
+       l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+       memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+       for_each_possible_cpu(cpu) {
+               /*
+                * The fallback flush is currently coded for 8-way
+                * associativity. Different associativity is possible, but it
+                * will be treated as 8-way and may not evict the lines as
+                * effectively.
+                *
+                * 128 byte lines are mandatory.
+                */
+               u64 c = l1d_size / 8;
+
+               paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+               paca[cpu].l1d_flush_congruence = c;
+               paca[cpu].l1d_flush_sets = c / 128;
+       }
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+       if (types & L1D_FLUSH_FALLBACK) {
+               pr_info("rfi-flush: Using fallback displacement flush\n");
+               init_fallback_flush();
+       }
+
+       if (types & L1D_FLUSH_ORI)
+               pr_info("rfi-flush: Using ori type flush\n");
+
+       if (types & L1D_FLUSH_MTTRIG)
+               pr_info("rfi-flush: Using mttrig type flush\n");
+
+       enabled_flush_types = types;
+
+       if (!no_rfi_flush)
+               rfi_flush_enable(enable);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
index 0494e1566ee2ab9b976cb0d9edb8c195a4490a23..307843d23682a79f0e4d9ae0cdb86a219e0b6e6a 100644 (file)
@@ -132,6 +132,15 @@ SECTIONS
        /* Read-only data */
        RO_DATA(PAGE_SIZE)
 
+#ifdef CONFIG_PPC64
+       . = ALIGN(8);
+       __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+               __start___rfi_flush_fixup = .;
+               *(__rfi_flush_fixup)
+               __stop___rfi_flush_fixup = .;
+       }
+#endif
+
        EXCEPTION_TABLE(0)
 
        NOTES :kernel :notes
index 29ebe2fd58674c59f27803a5426e4d2414f39418..a93d719edc906887b0f4bf412b2b76ac04babfec 100644 (file)
@@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
                gpte->may_read = true;
                gpte->may_write = true;
                gpte->page_size = MMU_PAGE_4K;
+               gpte->wimg = HPTE_R_M;
 
                return 0;
        }
index 966097232d2147bbcd79df41354a5f973fb9b7c1..b73dbc9e797da76bf4305a73972c4af925fbd70d 100644 (file)
@@ -65,11 +65,17 @@ struct kvm_resize_hpt {
        u32 order;
 
        /* These fields protected by kvm->lock */
+
+       /* Possible values and their usage:
+        *  <0     an error occurred during allocation,
+        *  -EBUSY allocation is in the progress,
+        *  0      allocation made successfuly.
+        */
        int error;
-       bool prepare_done;
 
-       /* Private to the work thread, until prepare_done is true,
-        * then protected by kvm->resize_hpt_sem */
+       /* Private to the work thread, until error != -EBUSY,
+        * then protected by kvm->lock.
+        */
        struct kvm_hpt_info hpt;
 };
 
@@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
                 * Reset all the reverse-mapping chains for all memslots
                 */
                kvmppc_rmap_reset(kvm);
-               /* Ensure that each vcpu will flush its TLB on next entry. */
-               cpumask_setall(&kvm->arch.need_tlb_flush);
                err = 0;
                goto out;
        }
@@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
        kvmppc_set_hpt(kvm, &info);
 
 out:
+       if (err == 0)
+               /* Ensure that each vcpu will flush its TLB on next entry. */
+               cpumask_setall(&kvm->arch.need_tlb_flush);
+
        mutex_unlock(&kvm->lock);
        return err;
 }
@@ -1413,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
 
 static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
 {
-       BUG_ON(kvm->arch.resize_hpt != resize);
+       if (WARN_ON(!mutex_is_locked(&kvm->lock)))
+               return;
 
        if (!resize)
                return;
 
-       if (resize->hpt.virt)
-               kvmppc_free_hpt(&resize->hpt);
+       if (resize->error != -EBUSY) {
+               if (resize->hpt.virt)
+                       kvmppc_free_hpt(&resize->hpt);
+               kfree(resize);
+       }
 
-       kvm->arch.resize_hpt = NULL;
-       kfree(resize);
+       if (kvm->arch.resize_hpt == resize)
+               kvm->arch.resize_hpt = NULL;
 }
 
 static void resize_hpt_prepare_work(struct work_struct *work)
@@ -1431,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)
                                                     struct kvm_resize_hpt,
                                                     work);
        struct kvm *kvm = resize->kvm;
-       int err;
+       int err = 0;
 
-       resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
-                        resize->order);
-
-       err = resize_hpt_allocate(resize);
+       if (WARN_ON(resize->error != -EBUSY))
+               return;
 
        mutex_lock(&kvm->lock);
 
+       /* Request is still current? */
+       if (kvm->arch.resize_hpt == resize) {
+               /* We may request large allocations here:
+                * do not sleep with kvm->lock held for a while.
+                */
+               mutex_unlock(&kvm->lock);
+
+               resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+                                resize->order);
+
+               err = resize_hpt_allocate(resize);
+
+               /* We have strict assumption about -EBUSY
+                * when preparing for HPT resize.
+                */
+               if (WARN_ON(err == -EBUSY))
+                       err = -EINPROGRESS;
+
+               mutex_lock(&kvm->lock);
+               /* It is possible that kvm->arch.resize_hpt != resize
+                * after we grab kvm->lock again.
+                */
+       }
+
        resize->error = err;
-       resize->prepare_done = true;
+
+       if (kvm->arch.resize_hpt != resize)
+               resize_hpt_release(kvm, resize);
 
        mutex_unlock(&kvm->lock);
 }
@@ -1466,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 
        if (resize) {
                if (resize->order == shift) {
-                       /* Suitable resize in progress */
-                       if (resize->prepare_done) {
-                               ret = resize->error;
-                               if (ret != 0)
-                                       resize_hpt_release(kvm, resize);
-                       } else {
+                       /* Suitable resize in progress? */
+                       ret = resize->error;
+                       if (ret == -EBUSY)
                                ret = 100; /* estimated time in ms */
-                       }
+                       else if (ret)
+                               resize_hpt_release(kvm, resize);
 
                        goto out;
                }
@@ -1493,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
                ret = -ENOMEM;
                goto out;
        }
+
+       resize->error = -EBUSY;
        resize->order = shift;
        resize->kvm = kvm;
        INIT_WORK(&resize->work, resize_hpt_prepare_work);
@@ -1547,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
        if (!resize || (resize->order != shift))
                goto out;
 
-       ret = -EBUSY;
-       if (!resize->prepare_done)
-               goto out;
-
        ret = resize->error;
-       if (ret != 0)
+       if (ret)
                goto out;
 
        ret = resize_hpt_rehash(resize);
-       if (ret != 0)
+       if (ret)
                goto out;
 
        resize_hpt_pivot(resize);
index 2659844784b817d7ca77e6e95ecb9418f430e62d..9c61f736c75b2d0761ec4f9d2e385df75b6dc882 100644 (file)
@@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
        mtmsrd  r0,1            /* clear RI in MSR */
        mtsrr0  r5
        mtsrr1  r6
-       RFI
+       RFI_TO_KERNEL
 
 kvmppc_call_hv_entry:
 BEGIN_FTR_SECTION
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mtmsrd  r6, 1                   /* Clear RI in MSR */
        mtsrr0  r8
        mtsrr1  r7
-       RFI
+       RFI_TO_KERNEL
 
        /* Virtual-mode return */
 .Lvirt_return:
@@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
        ld      r0, VCPU_GPR(R0)(r4)
        ld      r4, VCPU_GPR(R4)(r4)
-
-       hrfid
+       HRFI_TO_GUEST
        b       .
 
 secondary_too_late:
@@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
        ld      r4, PACAKMSR(r13)
        mtspr   SPRN_SRR0, r3
        mtspr   SPRN_SRR1, r4
-       rfid
+       RFI_TO_KERNEL
 9:     addi    r3, r1, STACK_FRAME_OVERHEAD
        bl      kvmppc_bad_interrupt
        b       9b
index d0dc8624198f8e4663800c4ca603aea98d4ba128..7deaeeb14b9358d8a4e6f1107cd42bba0605a264 100644 (file)
@@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 #define MSR_USER32 MSR_USER
 #define MSR_USER64 MSR_USER
 #define HW_PAGE_SIZE PAGE_SIZE
+#define HPTE_R_M   _PAGE_COHERENT
 #endif
 
 static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
@@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                pte.eaddr = eaddr;
                pte.vpage = eaddr >> 12;
                pte.page_size = MMU_PAGE_64K;
+               pte.wimg = HPTE_R_M;
        }
 
        switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
index 42a4b237df5f5731a4f6964feafc4db2835ffdee..34a5adeff0840662e8eae4553b008069bfd9426d 100644 (file)
@@ -46,6 +46,9 @@
 
 #define FUNC(name)             name
 
+#define RFI_TO_KERNEL  RFI
+#define RFI_TO_GUEST   RFI
+
 .macro INTERRUPT_TRAMPOLINE intno
 
 .global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
        GET_SCRATCH0(r13)
 
        /* And get back into the code */
-       RFI
+       RFI_TO_KERNEL
 #endif
 
 /*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
        ori     r5, r5, MSR_EE
        mtsrr0  r7
        mtsrr1  r6
-       RFI
+       RFI_TO_KERNEL
 
 #include "book3s_segment.S"
index 2a2b96d5399917398312dacb7c7b2846483a7fd4..93a180ceefad03343d1f9064420ee00ca54973d7 100644 (file)
@@ -156,7 +156,7 @@ no_dcbz32_on:
        PPC_LL  r9, SVCPU_R9(r3)
        PPC_LL  r3, (SVCPU_R3)(r3)
 
-       RFI
+       RFI_TO_GUEST
 kvmppc_handler_trampoline_enter_end:
 
 
@@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
        cmpwi   r12, BOOK3S_INTERRUPT_DOORBELL
        beqa    BOOK3S_INTERRUPT_DOORBELL
 
-       RFI
+       RFI_TO_KERNEL
 kvmppc_handler_trampoline_exit_end:
index bf457843e03217b9aa02815d7791f0fce72aea2b..0d750d274c4e21a3324eb3505bbd73c86a58cdc9 100644 (file)
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
 
        /* Return the per-cpu state for state saving/migration */
        return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
-              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+              (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
 }
 
 int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
        /*
         * Restore P and Q. If the interrupt was pending, we
-        * force both P and Q, which will trigger a resend.
+        * force Q and !P, which will trigger a resend.
         *
         * That means that a guest that had both an interrupt
         * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
         * is perfectly fine as coalescing interrupts that haven't
         * been presented yet is always allowed.
         */
-       if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+       if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
                state->old_p = true;
        if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
                state->old_q = true;
index 41cf5ae273cf74a2747d13b9da2274b8eb2054cb..a95ea007d654d5db2b78d811a4ef21a750a95609 100644 (file)
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
        }
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+       unsigned int instrs[3], *dest;
+       long *start, *end;
+       int i;
+
+       start = PTRRELOC(&__start___rfi_flush_fixup),
+       end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+       instrs[0] = 0x60000000; /* nop */
+       instrs[1] = 0x60000000; /* nop */
+       instrs[2] = 0x60000000; /* nop */
+
+       if (types & L1D_FLUSH_FALLBACK)
+               /* b .+16 to fallback flush */
+               instrs[0] = 0x48000010;
+
+       i = 0;
+       if (types & L1D_FLUSH_ORI) {
+               instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+               instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+       }
+
+       if (types & L1D_FLUSH_MTTRIG)
+               instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+       for (i = 0; start < end; start++, i++) {
+               dest = (void *)start + *start;
+
+               pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+               patch_instruction(dest, instrs[0]);
+               patch_instruction(dest + 1, instrs[1]);
+               patch_instruction(dest + 2, instrs[2]);
+       }
+
+       printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
        long *start, *end;
index 4797d08581cec347b6cf34a316c1ef585209653c..6e1e3903538065becbab2ad47febad597f5d6d49 100644 (file)
@@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
        return __bad_area(regs, address, SEGV_MAPERR);
 }
 
+static noinline int bad_access(struct pt_regs *regs, unsigned long address)
+{
+       return __bad_area(regs, address, SEGV_ACCERR);
+}
+
 static int do_sigbus(struct pt_regs *regs, unsigned long address,
                     unsigned int fault)
 {
@@ -490,7 +495,7 @@ retry:
 
 good_area:
        if (unlikely(access_error(is_write, is_exec, vma)))
-               return bad_area(regs, address);
+               return bad_access(regs, address);
 
        /*
         * If for any reason at all we couldn't handle the fault,
index 46d74e81aff1b4caad4769e7686fa0a800695cd4..d183b4801bdbded832b90d2aa1a18e713f70695b 100644 (file)
@@ -763,7 +763,8 @@ emit_clear:
                        func = (u8 *) __bpf_call_base + imm;
 
                        /* Save skb pointer if we need to re-cache skb data */
-                       if (bpf_helper_changes_pkt_data(func))
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func))
                                PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
 
                        bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
                        PPC_MR(b2p[BPF_REG_0], 3);
 
                        /* refresh skb cache */
-                       if (bpf_helper_changes_pkt_data(func)) {
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func)) {
                                /* reload skb pointer to r3 */
                                PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
                                bpf_jit_emit_skb_loads(image, ctx);
index 1538129663658381b6b1a425dcbf582b1ed09531..fce545774d50afc6093c28ad2f4127c24ed5331c 100644 (file)
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
        int ret;
        __u64 target;
 
-       if (is_kernel_addr(addr))
-               return branch_target((unsigned int *)addr);
+       if (is_kernel_addr(addr)) {
+               if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+                       return 0;
+
+               return branch_target(&instr);
+       }
 
        /* Userspace: need copy instruction here then translate it */
        pagefault_disable();
index 0ead3cd73caa2f8816e8c04f47cca691efba0560..be4e7f84f70a59db60e92a9bfe845678f71cc608 100644 (file)
@@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
        if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
                return 0;
 
+       /*
+        * Check whether nest_imc is registered. We could end up here if the
+        * cpuhotplug callback registration fails. i.e, callback invokes the
+        * offline path for all successfully registered nodes. At this stage,
+        * nest_imc pmu will not be registered and we should return here.
+        *
+        * We return with a zero since this is not an offline failure. And
+        * cpuhp_setup_state() returns the actual failure reason to the caller,
+        * which in turn will call the cleanup routine.
+        */
+       if (!nest_pmus)
+               return 0;
+
        /*
         * Now that this cpu is one of the designated,
         * find a next cpu a) which is online and b) in same chip.
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                if (nest_pmus == 1) {
                        cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
                        kfree(nest_imc_refc);
+                       kfree(per_nest_pmu_arr);
                }
 
                if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
        kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
        kfree(pmu_ptr);
-       kfree(per_nest_pmu_arr);
        return;
 }
 
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
                        ret = nest_pmu_cpumask_init();
                        if (ret) {
                                mutex_unlock(&nest_init_lock);
+                               kfree(nest_imc_refc);
+                               kfree(per_nest_pmu_arr);
                                goto err_free;
                        }
                }
index 1edfbc1e40f4387b30dd5c0a9491935460feaf92..4fb21e17504aad72295a9e1cdffe54c5547379bd 100644 (file)
 #include <asm/kexec.h>
 #include <asm/smp.h>
 #include <asm/tm.h>
+#include <asm/setup.h>
 
 #include "powernv.h"
 
+static void pnv_setup_rfi_flush(void)
+{
+       struct device_node *np, *fw_features;
+       enum l1d_flush_type type;
+       int enable;
+
+       /* Default to fallback in case fw-features are not available */
+       type = L1D_FLUSH_FALLBACK;
+       enable = 1;
+
+       np = of_find_node_by_name(NULL, "ibm,opal");
+       fw_features = of_get_child_by_name(np, "fw-features");
+       of_node_put(np);
+
+       if (fw_features) {
+               np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
+               if (np && of_property_read_bool(np, "enabled"))
+                       type = L1D_FLUSH_MTTRIG;
+
+               of_node_put(np);
+
+               np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
+               if (np && of_property_read_bool(np, "enabled"))
+                       type = L1D_FLUSH_ORI;
+
+               of_node_put(np);
+
+               /* Enable unless firmware says NOT to */
+               enable = 2;
+               np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
+               if (np && of_property_read_bool(np, "disabled"))
+                       enable--;
+
+               of_node_put(np);
+
+               np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
+               if (np && of_property_read_bool(np, "disabled"))
+                       enable--;
+
+               of_node_put(np);
+               of_node_put(fw_features);
+       }
+
+       setup_rfi_flush(type, enable > 0);
+}
+
 static void __init pnv_setup_arch(void)
 {
        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
+       pnv_setup_rfi_flush();
+
        /* Initialize SMP */
        pnv_smp_init();
 
index 6e35780c5962f25144b0181414c03a017840478f..a0b20c03f078cc41b9ad126fbf2603a4ffb68463 100644 (file)
@@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
 
 static CLASS_ATTR_RW(dlpar);
 
-static int __init pseries_dlpar_init(void)
+int __init dlpar_workqueue_init(void)
 {
+       if (pseries_hp_wq)
+               return 0;
+
        pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
-                                       WQ_UNBOUND, 1);
+                       WQ_UNBOUND, 1);
+
+       return pseries_hp_wq ? 0 : -ENOMEM;
+}
+
+static int __init dlpar_sysfs_init(void)
+{
+       int rc;
+
+       rc = dlpar_workqueue_init();
+       if (rc)
+               return rc;
+
        return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
 }
-machine_device_initcall(pseries, pseries_dlpar_init);
+machine_device_initcall(pseries, dlpar_sysfs_init);
 
index 4470a3194311e06e040c624b8b1491eb0e071298..1ae1d9f4dbe99935130971cdc390d8f581ad788e 100644 (file)
@@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)
        return CMO_PageSize;
 }
 
+int dlpar_workqueue_init(void);
+
 #endif /* _PSERIES_PSERIES_H */
index 4923ffe230cf926565c0ed4c9a339a822b073c15..81d8614e73790b1923a3c8cfd336a2531fee76ce 100644 (file)
@@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)
        /* Hotplug Events */
        np = of_find_node_by_path("/event-sources/hot-plug-events");
        if (np != NULL) {
-               request_event_sources_irqs(np, ras_hotplug_interrupt,
+               if (dlpar_workqueue_init() == 0)
+                       request_event_sources_irqs(np, ras_hotplug_interrupt,
                                           "RAS_HOTPLUG");
                of_node_put(np);
        }
index a8531e01265842f39e759bc59aa67f3afd461486..ae4f596273b51a836e5d27307f81bfe6438590bf 100644 (file)
@@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
        of_pci_check_probe_only();
 }
 
+static void pseries_setup_rfi_flush(void)
+{
+       struct h_cpu_char_result result;
+       enum l1d_flush_type types;
+       bool enable;
+       long rc;
+
+       /* Enable by default */
+       enable = true;
+
+       rc = plpar_get_cpu_characteristics(&result);
+       if (rc == H_SUCCESS) {
+               types = L1D_FLUSH_NONE;
+
+               if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+                       types |= L1D_FLUSH_MTTRIG;
+               if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+                       types |= L1D_FLUSH_ORI;
+
+               /* Use fallback if nothing set in hcall */
+               if (types == L1D_FLUSH_NONE)
+                       types = L1D_FLUSH_FALLBACK;
+
+               if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+                       enable = false;
+       } else {
+               /* Default to fallback if case hcall is not available */
+               types = L1D_FLUSH_FALLBACK;
+       }
+
+       setup_rfi_flush(types, enable);
+}
+
 static void __init pSeries_setup_arch(void)
 {
        set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
 
        fwnmi_init();
 
+       pseries_setup_rfi_flush();
+
        /* By default, only probe PCI (can be overridden by rtas_pci) */
        pci_add_flags(PCI_PROBE_ONLY);
 
index 44cbf4c12ea137562f02758aab4e2e604f54e96f..df95102e732cb466911b32632fd53e3f3369bf54 100644 (file)
@@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)
 }
 
 static struct lock_class_key fsl_msi_irq_class;
+static struct lock_class_key fsl_msi_irq_request_class;
 
 static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
                               int offset, int irq_index)
@@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,
                dev_err(&dev->dev, "No memory for MSI cascade data\n");
                return -ENOMEM;
        }
-       irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class);
+       irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class,
+                             &fsl_msi_irq_request_class);
        cascade_data->index = offset;
        cascade_data->msi_data = msi;
        cascade_data->virq = virt_msir;
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..47dacf06c679f3eff22c0a6ab0f619d9b4019ab5 100644 (file)
@@ -0,0 +1,75 @@
+CONFIG_SMP=y
+CONFIG_PCI=y
+CONFIG_PCIE_XILINX=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EXPERT=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NETLINK_DIAG=y
+CONFIG_DEVTMPFS=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_NETDEVICES=y
+CONFIG_VIRTIO_NET=y
+CONFIG_MACB=y
+CONFIG_E1000E=y
+CONFIG_R8169=y
+CONFIG_MICROSEMI_PHY=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_USB=y
+CONFIG_USB_XHCI_HCD=y
+CONFIG_USB_XHCI_PLATFORM=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_UAS=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_RAS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
+# CONFIG_RCU_TRACE is not set
+CONFIG_CRYPTO_USER_API_HASH=y
index 0d64bc9f4f91562872b36231e5b593887498cde2..3c7a2c97e377a5af2923c8e7d96cf04e57e66892 100644 (file)
 #include <linux/const.h>
 
 /* Status register flags */
-#define SR_IE   _AC(0x00000002, UL) /* Interrupt Enable */
-#define SR_PIE  _AC(0x00000020, UL) /* Previous IE */
-#define SR_PS   _AC(0x00000100, UL) /* Previously Supervisor */
-#define SR_SUM  _AC(0x00040000, UL) /* Supervisor may access User Memory */
+#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_SPIE        _AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_SUM _AC(0x00040000, UL) /* Supervisor may access User Memory */
 
 #define SR_FS           _AC(0x00006000, UL) /* Floating-point Status */
 #define SR_FS_OFF       _AC(0x00000000, UL)
index a82ce599b639813c9ed3ad697f217cb09a6538e1..b269451e7e85577c76cb718283806fdfb6f76532 100644 (file)
@@ -21,8 +21,6 @@
 
 #include <linux/types.h>
 
-#ifdef CONFIG_MMU
-
 extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
 
 /*
@@ -36,8 +34,6 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
 
 extern void iounmap(volatile void __iomem *addr);
 
-#endif /* CONFIG_MMU */
-
 /* Generic IO read/write.  These perform native-endian accesses. */
 #define __raw_writeb __raw_writeb
 static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
index 6fdc860d7f84fd69648af547ff8984946eac93fd..07a3c6d5706ff8fd8f34acbe6c5832fc6e638676 100644 (file)
@@ -27,25 +27,25 @@ static inline unsigned long arch_local_save_flags(void)
 /* unconditionally enable interrupts */
 static inline void arch_local_irq_enable(void)
 {
-       csr_set(sstatus, SR_IE);
+       csr_set(sstatus, SR_SIE);
 }
 
 /* unconditionally disable interrupts */
 static inline void arch_local_irq_disable(void)
 {
-       csr_clear(sstatus, SR_IE);
+       csr_clear(sstatus, SR_SIE);
 }
 
 /* get status and disable interrupts */
 static inline unsigned long arch_local_irq_save(void)
 {
-       return csr_read_clear(sstatus, SR_IE);
+       return csr_read_clear(sstatus, SR_SIE);
 }
 
 /* test flags */
 static inline int arch_irqs_disabled_flags(unsigned long flags)
 {
-       return !(flags & SR_IE);
+       return !(flags & SR_SIE);
 }
 
 /* test hardware interrupt enable bit */
@@ -57,7 +57,7 @@ static inline int arch_irqs_disabled(void)
 /* set interrupt enabled status */
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-       csr_set(sstatus, flags & SR_IE);
+       csr_set(sstatus, flags & SR_SIE);
 }
 
 #endif /* _ASM_RISCV_IRQFLAGS_H */
index 2cbd92ed1629c00df42b8ceaffd2250b6de7413a..16301966d65b6fd8a54614d12f0815866d19d948 100644 (file)
@@ -20,8 +20,6 @@
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_MMU
-
 /* Page Upper Directory not used in RISC-V */
 #include <asm-generic/pgtable-nopud.h>
 #include <asm/page.h>
@@ -413,8 +411,6 @@ static inline void pgtable_cache_init(void)
        /* No page table caches to initialize */
 }
 
-#endif /* CONFIG_MMU */
-
 #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
 #define VMALLOC_END      (PAGE_OFFSET - 1)
 #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
index 93b8956e25e46714a5bd789ed0ea15ebb2a687f2..2c5df945d43c9abfdfd197a61d8c92ce20e48133 100644 (file)
@@ -66,7 +66,7 @@ struct pt_regs {
 #define REG_FMT "%08lx"
 #endif
 
-#define user_mode(regs) (((regs)->sstatus & SR_PS) == 0)
+#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
 
 
 /* Helpers for working with the instruction pointer */
index 715b0f10af580811dfca3ba067819e07fe1e7374..7b9c24ebdf5293ac73b8155b2c394fc9244a712d 100644 (file)
@@ -15,8 +15,6 @@
 #ifndef _ASM_RISCV_TLBFLUSH_H
 #define _ASM_RISCV_TLBFLUSH_H
 
-#ifdef CONFIG_MMU
-
 #include <linux/mm_types.h>
 
 /*
@@ -64,6 +62,4 @@ static inline void flush_tlb_kernel_range(unsigned long start,
        flush_tlb_all();
 }
 
-#endif /* CONFIG_MMU */
-
 #endif /* _ASM_RISCV_TLBFLUSH_H */
index 27b90d64814b8d0422d0316f8e765b6e97081a47..14b0b22fb57875dfe920685265a79da317c517e0 100644 (file)
@@ -127,7 +127,6 @@ extern int fixup_exception(struct pt_regs *state);
  * call.
  */
 
-#ifdef CONFIG_MMU
 #define __get_user_asm(insn, x, ptr, err)                      \
 do {                                                           \
        uintptr_t __tmp;                                        \
@@ -153,13 +152,11 @@ do {                                                              \
        __disable_user_access();                                \
        (x) = __x;                                              \
 } while (0)
-#endif /* CONFIG_MMU */
 
 #ifdef CONFIG_64BIT
 #define __get_user_8(x, ptr, err) \
        __get_user_asm("ld", x, ptr, err)
 #else /* !CONFIG_64BIT */
-#ifdef CONFIG_MMU
 #define __get_user_8(x, ptr, err)                              \
 do {                                                           \
        u32 __user *__ptr = (u32 __user *)(ptr);                \
@@ -193,7 +190,6 @@ do {                                                                \
        (x) = (__typeof__(x))((__typeof__((x)-(x)))(            \
                (((u64)__hi << 32) | __lo)));                   \
 } while (0)
-#endif /* CONFIG_MMU */
 #endif /* CONFIG_64BIT */
 
 
@@ -267,8 +263,6 @@ do {                                                                \
                ((x) = 0, -EFAULT);                             \
 })
 
-
-#ifdef CONFIG_MMU
 #define __put_user_asm(insn, x, ptr, err)                      \
 do {                                                           \
        uintptr_t __tmp;                                        \
@@ -292,14 +286,11 @@ do {                                                              \
                : "rJ" (__x), "i" (-EFAULT));                   \
        __disable_user_access();                                \
 } while (0)
-#endif /* CONFIG_MMU */
-
 
 #ifdef CONFIG_64BIT
 #define __put_user_8(x, ptr, err) \
        __put_user_asm("sd", x, ptr, err)
 #else /* !CONFIG_64BIT */
-#ifdef CONFIG_MMU
 #define __put_user_8(x, ptr, err)                              \
 do {                                                           \
        u32 __user *__ptr = (u32 __user *)(ptr);                \
@@ -329,7 +320,6 @@ do {                                                                \
                : "rJ" (__x), "rJ" (__x >> 32), "i" (-EFAULT)); \
        __disable_user_access();                                \
 } while (0)
-#endif /* CONFIG_MMU */
 #endif /* CONFIG_64BIT */
 
 
@@ -438,7 +428,6 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
  * will set "err" to -EFAULT, while successful accesses return the previous
  * value.
  */
-#ifdef CONFIG_MMU
 #define __cmpxchg_user(ptr, old, new, err, size, lrb, scb)     \
 ({                                                             \
        __typeof__(ptr) __ptr = (ptr);                          \
@@ -508,6 +497,5 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
        (err) = __err;                                          \
        __ret;                                                  \
 })
-#endif /* CONFIG_MMU */
 
 #endif /* _ASM_RISCV_UACCESS_H */
index 9f250ed007cd816e523898a10f32ec6814da6892..2f704a5c4196e30fd2a9a9c3607eeeed3d6f019f 100644 (file)
@@ -14,3 +14,4 @@
 #define __ARCH_HAVE_MMU
 #define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
+#include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h
deleted file mode 100644 (file)
index a2ccf18..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2017 SiFive
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
-#define _ASM_RISCV_VDSO_SYSCALLS_H
-
-#ifdef CONFIG_SMP
-
-/* These syscalls are only used by the vDSO and are not in the uapi. */
-#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
-__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
-
-#endif
-
-#endif /* _ASM_RISCV_VDSO_H */
diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/syscalls.h
new file mode 100644 (file)
index 0000000..818655b
--- /dev/null
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2017 SiFive
+ */
+
+#ifndef _ASM__UAPI__SYSCALLS_H
+#define _ASM__UAPI__SYSCALLS_H
+
+/*
+ * Allows the instruction cache to be flushed from userspace.  Despite RISC-V
+ * having a direct 'fence.i' instruction available to userspace (which we
+ * can't trap!), that's not actually viable when running on Linux because the
+ * kernel might schedule a process on another hart.  There is no way for
+ * userspace to handle this without invoking the kernel (as it doesn't know the
+ * thread->hart mappings), so we've defined a RISC-V specific system call to
+ * flush the instruction cache.
+ *
+ * __NR_riscv_flush_icache is defined to flush the instruction cache over an
+ * address range, with the flush applying to either all threads or just the
+ * caller.  We don't currently do anything with the address range, that's just
+ * in there for forwards compatibility.
+ */
+#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
+__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
+
+#endif
index 20ee86f782a93b19779236f39a40daa3d9563ac5..7404ec222406290ed03c671a3b4463c61aa49c12 100644 (file)
@@ -196,7 +196,7 @@ handle_syscall:
        addi s2, s2, 0x4
        REG_S s2, PT_SEPC(sp)
        /* System calls run with interrupts enabled */
-       csrs sstatus, SR_IE
+       csrs sstatus, SR_SIE
        /* Trace syscalls, but only if requested by the user. */
        REG_L t0, TASK_TI_FLAGS(tp)
        andi t0, t0, _TIF_SYSCALL_TRACE
@@ -224,8 +224,8 @@ ret_from_syscall:
 
 ret_from_exception:
        REG_L s0, PT_SSTATUS(sp)
-       csrc sstatus, SR_IE
-       andi s0, s0, SR_PS
+       csrc sstatus, SR_SIE
+       andi s0, s0, SR_SPP
        bnez s0, restore_all
 
 resume_userspace:
@@ -255,7 +255,7 @@ work_pending:
        bnez s1, work_resched
 work_notifysig:
        /* Handle pending signals and notify-resume requests */
-       csrs sstatus, SR_IE /* Enable interrupts for do_notify_resume() */
+       csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */
        move a0, sp /* pt_regs */
        move a1, s0 /* current_thread_info->flags */
        tail do_notify_resume
index 0d90dcc1fbd36559823e48db2dc15ac4319b063d..d74d4adf2d54f94a4dff39c14d7953b8c0fdbab8 100644 (file)
@@ -76,7 +76,7 @@ void show_regs(struct pt_regs *regs)
 void start_thread(struct pt_regs *regs, unsigned long pc,
        unsigned long sp)
 {
-       regs->sstatus = SR_PIE /* User mode, irqs on */ | SR_FS_INITIAL;
+       regs->sstatus = SR_SPIE /* User mode, irqs on */ | SR_FS_INITIAL;
        regs->sepc = pc;
        regs->sp = sp;
        set_fs(USER_DS);
@@ -110,7 +110,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
                const register unsigned long gp __asm__ ("gp");
                memset(childregs, 0, sizeof(struct pt_regs));
                childregs->gp = gp;
-               childregs->sstatus = SR_PS | SR_PIE; /* Supervisor, irqs on */
+               childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
 
                p->thread.ra = (unsigned long)ret_from_kernel_thread;
                p->thread.s[0] = usp; /* fn */
index a5bd6401f95e6988a376406f02b7e39cb7c1352e..ade52b903a43f2b27546fdd216b87b7057bc95d4 100644 (file)
@@ -23,5 +23,4 @@
 void *sys_call_table[__NR_syscalls] = {
        [0 ... __NR_syscalls - 1] = sys_ni_syscall,
 #include <asm/unistd.h>
-#include <asm/vdso-syscalls.h>
 };
index b0fbad74e873ea9fff553424b975502b589d7097..023e4d4aef588e139fafcbc9e3fb1ddef0482575 100644 (file)
@@ -13,7 +13,6 @@
 
 #include <linux/linkage.h>
 #include <asm/unistd.h>
-#include <asm/vdso-syscalls.h>
 
        .text
 /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
index df2ca3c65048f9347781b05309c9552eaa7d0844..0713f3c67ab4242a2e54430331044724bf57f289 100644 (file)
@@ -63,7 +63,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
                goto vmalloc_fault;
 
        /* Enable interrupts if they were enabled in the parent context. */
-       if (likely(regs->sstatus & SR_PIE))
+       if (likely(regs->sstatus & SR_SPIE))
                local_irq_enable();
 
        /*
index ec8b68e97d3cd4755074463e467a82474471e7c6..2c93cbbcd15e35a389078643874bf424160f25db 100644 (file)
@@ -792,11 +792,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
 
        if (kvm->arch.use_cmma) {
                /*
-                * Get the last slot. They should be sorted by base_gfn, so the
-                * last slot is also the one at the end of the address space.
-                * We have verified above that at least one slot is present.
+                * Get the first slot. They are reverse sorted by base_gfn, so
+                * the first slot is also the one at the end of the address
+                * space. We have verified above that at least one slot is
+                * present.
                 */
-               ms = slots->memslots + slots->used_slots - 1;
+               ms = slots->memslots;
                /* round up so we only use full longs */
                ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
                /* allocate enough bytes to store all the bits */
index 572496c688cc0c647bd220310bfdc6e9635d4723..0714bfa56da0f54cae66b26bd5329b18beb77b0b 100644 (file)
@@ -1006,7 +1006,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
                cbrlo[entries] = gfn << PAGE_SHIFT;
        }
 
-       if (orc) {
+       if (orc && gfn < ms->bitmap_size) {
                /* increment only if we are really flipping the bit to 1 */
                if (!test_and_set_bit(gfn, ms->pgste_bitmap))
                        atomic64_inc(&ms->dirty_pages);
index cae5a1e16cbd2d9ac5cc7b2fd1f67443919b8f80..c4f8039a35e8dda0bc20999b7db089e3ab09b613 100644 (file)
@@ -89,11 +89,11 @@ EXPORT_SYMBOL(enable_sacf_uaccess);
 
 void disable_sacf_uaccess(mm_segment_t old_fs)
 {
+       current->thread.mm_segment = old_fs;
        if (old_fs == USER_DS && test_facility(27)) {
                __ctl_load(S390_lowcore.user_asce, 1, 1);
                clear_cpu_flag(CIF_ASCE_PRIMARY);
        }
-       current->thread.mm_segment = old_fs;
 }
 EXPORT_SYMBOL(disable_sacf_uaccess);
 
index e81c16838b90f1bc9a5418bc1b4e5365e9cb0aef..9557d8b516df5a689dda995cd7fd501ddd6cf54c 100644 (file)
@@ -55,8 +55,7 @@ struct bpf_jit {
 #define SEEN_LITERAL   8       /* code uses literals */
 #define SEEN_FUNC      16      /* calls C functions */
 #define SEEN_TAIL_CALL 32      /* code uses tail calls */
-#define SEEN_SKB_CHANGE        64      /* code changes skb data */
-#define SEEN_REG_AX    128     /* code uses constant blinding */
+#define SEEN_REG_AX    64      /* code uses constant blinding */
 #define SEEN_STACK     (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
 
 /*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
                        EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
                                      REG_15, 152);
        }
-       if (jit->seen & SEEN_SKB)
+       if (jit->seen & SEEN_SKB) {
                emit_load_skb_data_hlen(jit);
-       if (jit->seen & SEEN_SKB_CHANGE)
                /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
                EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
                              STK_OFF_SKBP);
+       }
 }
 
 /*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
                EMIT2(0x0d00, REG_14, REG_W1);
                /* lgr %b0,%r2: load return value into %b0 */
                EMIT4(0xb9040000, BPF_REG_0, REG_2);
-               if (bpf_helper_changes_pkt_data((void *)func)) {
-                       jit->seen |= SEEN_SKB_CHANGE;
+               if ((jit->seen & SEEN_SKB) &&
+                   bpf_helper_changes_pkt_data((void *)func)) {
                        /* lg %b1,ST_OFF_SKBP(%r15) */
                        EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
                                      REG_15, STK_OFF_SKBP);
index f7aa5a77827ec17d893d59834a0d33082bb8fd82..2d15d84c20ede64297e0c906a55e3de166f53e3d 100644 (file)
@@ -181,6 +181,9 @@ out_unlock:
 static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
                           size_t size, int flags)
 {
+       unsigned long irqflags;
+       int ret;
+
        /*
         * With zdev->tlb_refresh == 0, rpcit is not required to establish new
         * translations when previously invalid translation-table entries are
@@ -196,8 +199,22 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
                        return 0;
        }
 
-       return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
-                                 PAGE_ALIGN(size));
+       ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
+                                PAGE_ALIGN(size));
+       if (ret == -ENOMEM && !s390_iommu_strict) {
+               /* enable the hypervisor to free some resources */
+               if (zpci_refresh_global(zdev))
+                       goto out;
+
+               spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
+               bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+                             zdev->lazy_bitmap, zdev->iommu_pages);
+               bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+               spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
+               ret = 0;
+       }
+out:
+       return ret;
 }
 
 static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
index 19bcb3b45a70fc12fa426d636fd4482c570c6654..f069929e82114004adea2cc0bfc3abc15bdf23da 100644 (file)
@@ -89,6 +89,9 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
        if (cc)
                zpci_err_insn(cc, status, addr, range);
 
+       if (cc == 1 && (status == 4 || status == 16))
+               return -ENOMEM;
+
        return (cc) ? -EIO : 0;
 }
 
index 77c35350ee774d4fa26ed5bc57b70d9d03c7bcab..412326d59e6fcebd51469163f6476b88753aaa4c 100644 (file)
@@ -9,6 +9,7 @@
  */
 #include <linux/init.h>
 #include <linux/platform_device.h>
+#include <linux/sh_eth.h>
 #include <mach-se/mach/se.h>
 #include <mach-se/mach/mrshpc.h>
 #include <asm/machvec.h>
@@ -115,13 +116,23 @@ static struct platform_device heartbeat_device = {
 #if defined(CONFIG_CPU_SUBTYPE_SH7710) ||\
        defined(CONFIG_CPU_SUBTYPE_SH7712)
 /* SH771X Ethernet driver */
+static struct sh_eth_plat_data sh_eth_plat = {
+       .phy = PHY_ID,
+       .phy_interface = PHY_INTERFACE_MODE_MII,
+};
+
 static struct resource sh_eth0_resources[] = {
        [0] = {
                .start = SH_ETH0_BASE,
-               .end = SH_ETH0_BASE + 0x1B8,
+               .end = SH_ETH0_BASE + 0x1B8 - 1,
                .flags = IORESOURCE_MEM,
        },
        [1] = {
+               .start = SH_TSU_BASE,
+               .end = SH_TSU_BASE + 0x200 - 1,
+               .flags = IORESOURCE_MEM,
+       },
+       [2] = {
                .start = SH_ETH0_IRQ,
                .end = SH_ETH0_IRQ,
                .flags = IORESOURCE_IRQ,
@@ -132,7 +143,7 @@ static struct platform_device sh_eth0_device = {
        .name = "sh771x-ether",
        .id = 0,
        .dev = {
-               .platform_data = PHY_ID,
+               .platform_data = &sh_eth_plat,
        },
        .num_resources = ARRAY_SIZE(sh_eth0_resources),
        .resource = sh_eth0_resources,
@@ -141,10 +152,15 @@ static struct platform_device sh_eth0_device = {
 static struct resource sh_eth1_resources[] = {
        [0] = {
                .start = SH_ETH1_BASE,
-               .end = SH_ETH1_BASE + 0x1B8,
+               .end = SH_ETH1_BASE + 0x1B8 - 1,
                .flags = IORESOURCE_MEM,
        },
        [1] = {
+               .start = SH_TSU_BASE,
+               .end = SH_TSU_BASE + 0x200 - 1,
+               .flags = IORESOURCE_MEM,
+       },
+       [2] = {
                .start = SH_ETH1_IRQ,
                .end = SH_ETH1_IRQ,
                .flags = IORESOURCE_IRQ,
@@ -155,7 +171,7 @@ static struct platform_device sh_eth1_device = {
        .name = "sh771x-ether",
        .id = 1,
        .dev = {
-               .platform_data = PHY_ID,
+               .platform_data = &sh_eth_plat,
        },
        .num_resources = ARRAY_SIZE(sh_eth1_resources),
        .resource = sh_eth1_resources,
index 4246ef9b07a346ed590be0cd3651f5a370ee700a..aa83fe1ff0b124c002e375e2ce5a83c7c853e29c 100644 (file)
 /* Base address */
 #define SH_ETH0_BASE 0xA7000000
 #define SH_ETH1_BASE 0xA7000400
+#define SH_TSU_BASE  0xA7000800
 /* PHY ID */
 #if defined(CONFIG_CPU_SUBTYPE_SH7710)
 # define PHY_ID 0x00
index e5547b22cd1832c3aea507b3dfc694da90568222..0ddbbb03182232fe199f5222248617c72a2f23b7 100644 (file)
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
        .previous
 
 ENTRY(__arch_hweight64)
-       sethi   %hi(__sw_hweight16), %g1
-       jmpl    %g1 + %lo(__sw_hweight16), %g0
+       sethi   %hi(__sw_hweight64), %g1
+       jmpl    %g1 + %lo(__sw_hweight64), %g0
         nop
 ENDPROC(__arch_hweight64)
 EXPORT_SYMBOL(__arch_hweight64)
index be3136f142a9993e0c6c8cfa1d651b1685654a73..a8103a84b4ac4a2ec84c44c302862b3aed8b7e7f 100644 (file)
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
               task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
               tsk->comm, task_pid_nr(tsk), address,
               (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
index 815c03d7a765524424b92866b1567ea2a43695d4..41363f46797bf9f74dd922fadbd2a3f190e8c9bb 100644 (file)
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
               task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
               tsk->comm, task_pid_nr(tsk), address,
               (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
index 5765e7e711f78248d2bff70f9c57ca48a4514355..ff5f9cb3039af1f91c8701915f08c051c21d0d81 100644 (file)
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                u8 *func = ((u8 *)__bpf_call_base) + imm;
 
                ctx->saw_call = true;
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
 
                emit_call((u32 *)func, ctx);
                emit_nop(ctx);
 
                emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
 
-               if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
-                       load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       load_skb_regs(ctx, L7);
                break;
        }
 
index b668e351fd6c2e4f7a4b75c8a67eada77449abc9..fca34b2177e28a055663055d01c4fb7d78420285 100644 (file)
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
 /*
  * Needed since we do not use the asm-generic/mm_hooks.h:
  */
-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
        uml_setup_stubs(mm);
+       return 0;
 }
 extern void arch_exit_mmap(struct mm_struct *mm);
 static inline void arch_unmap(struct mm_struct *mm,
index 4e6fcb32620ffb2125f648622499e5bf7c950e72..428644175956231aad112a0ce221452913736635 100644 (file)
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
                (void *)UPT_IP(regs), (void *)UPT_SP(regs),
index 59b06b48f27d7a4e0d8b82fc147d3fdad7f75295..5c205a9cb5a6a4bb2c865255bc946d7ca4882db1 100644 (file)
@@ -81,9 +81,10 @@ do { \
        } \
 } while (0)
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 static inline void arch_unmap(struct mm_struct *mm,
index 5f25b39f04d4305dbec925a85dd43a4ed421386d..c4ac6043ebb0fc44e7016d6124ce7ce780adabd3 100644 (file)
@@ -298,7 +298,6 @@ void abort(void)
        /* if that doesn't kill us, halt */
        panic("Oops failed to kill thread");
 }
-EXPORT_SYMBOL(abort);
 
 void __init trap_init(void)
 {
index 8eed3f94bfc774de5e3f344590f8889a999dea9c..20da391b5f329885e26b30e0351d73d571d28fc1 100644 (file)
@@ -55,7 +55,6 @@ config X86
        select ARCH_HAS_GCOV_PROFILE_ALL
        select ARCH_HAS_KCOV                    if X86_64
        select ARCH_HAS_PMEM_API                if X86_64
-       # Causing hangs/crashes, see the commit that added this change for details.
        select ARCH_HAS_REFCOUNT
        select ARCH_HAS_UACCESS_FLUSHCACHE      if X86_64
        select ARCH_HAS_SET_MEMORY
@@ -89,6 +88,7 @@ config X86
        select GENERIC_CLOCKEVENTS_MIN_ADJUST
        select GENERIC_CMOS_UPDATE
        select GENERIC_CPU_AUTOPROBE
+       select GENERIC_CPU_VULNERABILITIES
        select GENERIC_EARLY_IOREMAP
        select GENERIC_FIND_FIRST_BIT
        select GENERIC_IOMAP
@@ -429,6 +429,19 @@ config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
 
+config RETPOLINE
+       bool "Avoid speculative indirect branches in kernel"
+       default y
+       help
+         Compile kernel with the retpoline compiler options to guard against
+         kernel-to-user data leaks by avoiding speculative indirect
+         branches. Requires a compiler with -mindirect-branch=thunk-extern
+         support for full protection. The kernel may run slower.
+
+         Without compiler support, at least indirect branches in assembler
+         code are eliminated. Since this includes the syscall entry path,
+         it is not entirely pointless.
+
 config INTEL_RDT
        bool "Intel Resource Director Technology support"
        default n
@@ -926,7 +939,8 @@ config MAXSMP
 config NR_CPUS
        int "Maximum number of CPUs" if SMP && !MAXSMP
        range 2 8 if SMP && X86_32 && !X86_BIGSMP
-       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
+       range 2 64 if SMP && X86_32 && X86_BIGSMP
+       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
        range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
        default "1" if !SMP
        default "8192" if MAXSMP
index 3e73bc255e4eb3edda965a6bc493e0e6ab8de354..fad55160dcb94a28e60d537d3d69d471a1e10e2e 100644 (file)
@@ -230,6 +230,14 @@ KBUILD_CFLAGS += -Wno-sign-compare
 #
 KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+    RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+    ifneq ($(RETPOLINE_CFLAGS),)
+        KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+    endif
+endif
+
 archscripts: scripts_basic
        $(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
index d5364ca2e3f9290d0ba36606b7e25369f872c144..b5e5e02f8cde7fa9123dc56981c3a3a98f45843c 100644 (file)
@@ -23,6 +23,9 @@
  */
 #undef CONFIG_AMD_MEM_ENCRYPT
 
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
 #include "misc.h"
 
 /* These actually do the work of building the kernel identity maps. */
index c9e8499fbfe75c0a98d0223247fa6cb0746198bb..6a10d52a41452d941c22e197fa50b4ebd93ba0d5 100644 (file)
@@ -80,39 +80,43 @@ genfdimage288() {
        mcopy $FBZIMAGE w:linux
 }
 
-genisoimage() {
+geniso() {
        tmp_dir=`dirname $FIMAGE`/isoimage
        rm -rf $tmp_dir
        mkdir $tmp_dir
-       for i in lib lib64 share end ; do
+       for i in lib lib64 share ; do
                for j in syslinux ISOLINUX ; do
                        if [ -f /usr/$i/$j/isolinux.bin ] ; then
                                isolinux=/usr/$i/$j/isolinux.bin
-                               cp $isolinux $tmp_dir
                        fi
                done
                for j in syslinux syslinux/modules/bios ; do
                        if [ -f /usr/$i/$j/ldlinux.c32 ]; then
                                ldlinux=/usr/$i/$j/ldlinux.c32
-                               cp $ldlinux $tmp_dir
                        fi
                done
                if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
                        break
                fi
-               if [ $i = end -a -z "$isolinux" ] ; then
-                       echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
-                       exit 1
-               fi
        done
+       if [ -z "$isolinux" ] ; then
+               echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
+               exit 1
+       fi
+       if [ -z "$ldlinux" ] ; then
+               echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
+               exit 1
+       fi
+       cp $isolinux $tmp_dir
+       cp $ldlinux $tmp_dir
        cp $FBZIMAGE $tmp_dir/linux
        echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
        if [ -f "$FDINITRD" ] ; then
                cp "$FDINITRD" $tmp_dir/initrd.img
        fi
-       mkisofs -J -r -input-charset=utf-8 -quiet -o $FIMAGE -b isolinux.bin \
-               -c boot.cat -no-emul-boot -boot-load-size 4 -boot-info-table \
-               $tmp_dir
+       genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
+               -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
+               -boot-info-table $tmp_dir
        isohybrid $FIMAGE 2>/dev/null || true
        rm -rf $tmp_dir
 }
@@ -121,6 +125,6 @@ case $1 in
        bzdisk)     genbzdisk;;
        fdimage144) genfdimage144;;
        fdimage288) genfdimage288;;
-       isoimage)   genisoimage;;
+       isoimage)   geniso;;
        *)          echo 'Unknown image format'; exit 1;
 esac
index 16627fec80b26b211ba3c4b50c3850c134e2bd62..3d09e3aca18dad33ba0c27c3673e49dcbfac0ce8 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/linkage.h>
 #include <asm/inst.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 /*
  * The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
        pxor INC, STATE4
        movdqu IV, 0x30(OUTP)
 
-       call *%r11
+       CALL_NOSPEC %r11
 
        movdqu 0x00(OUTP), INC
        pxor INC, STATE1
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
        _aesni_gf128mul_x_ble()
        movups IV, (IVP)
 
-       call *%r11
+       CALL_NOSPEC %r11
 
        movdqu 0x40(OUTP), INC
        pxor INC, STATE1
index f7c495e2863cb0daecb7b023e19e393cfc32c587..a14af6eb09cb074a1dcf42d0adaaf1c388ceb30b 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <linux/linkage.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
        vpxor 14 * 16(%rax), %xmm15, %xmm14;
        vpxor 15 * 16(%rax), %xmm15, %xmm15;
 
-       call *%r9;
+       CALL_NOSPEC %r9;
 
        addq $(16 * 16), %rsp;
 
index eee5b3982cfd3c6838a9a34534409df8935eba17..b66bbfa62f50d7c05ddb29b57a322979f64a6999 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/linkage.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
        vpxor 14 * 32(%rax), %ymm15, %ymm14;
        vpxor 15 * 32(%rax), %ymm15, %ymm15;
 
-       call *%r9;
+       CALL_NOSPEC %r9;
 
        addq $(16 * 32), %rsp;
 
index 7a7de27c6f415206f2c3b8e5a8a43d4468bc7aaa..d9b734d0c8cc78ecdc3293ca117546eb538a12fc 100644 (file)
@@ -45,6 +45,7 @@
 
 #include <asm/inst.h>
 #include <linux/linkage.h>
+#include <asm/nospec-branch.h>
 
 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 
@@ -172,7 +173,7 @@ continue_block:
        movzxw  (bufp, %rax, 2), len
        lea     crc_array(%rip), bufp
        lea     (bufp, len, 1), bufp
-       jmp     *bufp
+       JMP_NOSPEC bufp
 
        ################################################################
        ## 2a) PROCESS FULL BLOCKS:
index 3fd8bc560faece4cb6253e87a4c092965a73e7af..3f48f695d5e6ac6546a009c734fcac517564b24d 100644 (file)
@@ -1,6 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/jump_label.h>
 #include <asm/unwind_hints.h>
+#include <asm/cpufeatures.h>
+#include <asm/page_types.h>
+#include <asm/percpu.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
 
 /*
 
@@ -187,6 +192,148 @@ For 32-bit we have the following conventions - kernel is built with
 #endif
 .endm
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+/*
+ * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
+ * halves:
+ */
+#define PTI_USER_PGTABLE_BIT           PAGE_SHIFT
+#define PTI_USER_PGTABLE_MASK          (1 << PTI_USER_PGTABLE_BIT)
+#define PTI_USER_PCID_BIT              X86_CR3_PTI_PCID_USER_BIT
+#define PTI_USER_PCID_MASK             (1 << PTI_USER_PCID_BIT)
+#define PTI_USER_PGTABLE_AND_PCID_MASK  (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
+
+.macro SET_NOFLUSH_BIT reg:req
+       bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
+.endm
+
+.macro ADJUST_KERNEL_CR3 reg:req
+       ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
+       /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
+       andq    $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
+.endm
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       mov     %cr3, \scratch_reg
+       ADJUST_KERNEL_CR3 \scratch_reg
+       mov     \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+#define THIS_CPU_user_pcid_flush_mask   \
+       PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       mov     %cr3, \scratch_reg
+
+       ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+       /*
+        * Test if the ASID needs a flush.
+        */
+       movq    \scratch_reg, \scratch_reg2
+       andq    $(0x7FF), \scratch_reg          /* mask ASID */
+       bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       jnc     .Lnoflush_\@
+
+       /* Flush needed, clear the bit */
+       btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       movq    \scratch_reg2, \scratch_reg
+       jmp     .Lwrcr3_pcid_\@
+
+.Lnoflush_\@:
+       movq    \scratch_reg2, \scratch_reg
+       SET_NOFLUSH_BIT \scratch_reg
+
+.Lwrcr3_pcid_\@:
+       /* Flip the ASID to the user version */
+       orq     $(PTI_USER_PCID_MASK), \scratch_reg
+
+.Lwrcr3_\@:
+       /* Flip the PGD to the user version */
+       orq     $(PTI_USER_PGTABLE_MASK), \scratch_reg
+       mov     \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+.macro SWITCH_TO_USER_CR3_STACK        scratch_reg:req
+       pushq   %rax
+       SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+       popq    %rax
+.endm
+
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+       ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
+       movq    %cr3, \scratch_reg
+       movq    \scratch_reg, \save_reg
+       /*
+        * Test the user pagetable bit. If set, then the user page tables
+        * are active. If clear CR3 already has the kernel page table
+        * active.
+        */
+       bt      $PTI_USER_PGTABLE_BIT, \scratch_reg
+       jnc     .Ldone_\@
+
+       ADJUST_KERNEL_CR3 \scratch_reg
+       movq    \scratch_reg, %cr3
+
+.Ldone_\@:
+.endm
+
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+
+       ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+       /*
+        * KERNEL pages can always resume with NOFLUSH as we do
+        * explicit flushes.
+        */
+       bt      $PTI_USER_PGTABLE_BIT, \save_reg
+       jnc     .Lnoflush_\@
+
+       /*
+        * Check if there's a pending flush for the user ASID we're
+        * about to set.
+        */
+       movq    \save_reg, \scratch_reg
+       andq    $(0x7FF), \scratch_reg
+       bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       jnc     .Lnoflush_\@
+
+       btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       jmp     .Lwrcr3_\@
+
+.Lnoflush_\@:
+       SET_NOFLUSH_BIT \save_reg
+
+.Lwrcr3_\@:
+       /*
+        * The CR3 write could be avoided when not changing its value,
+        * but would require a CR3 read *and* a scratch register.
+        */
+       movq    \save_reg, %cr3
+.Lend_\@:
+.endm
+
+#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+.endm
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+.endm
+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+.endm
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+.endm
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+.endm
+
+#endif
+
 #endif /* CONFIG_X86_64 */
 
 /*
index 4838037f97f6edffda62b5b045c837fcc29402f0..a1f28a54f23a42e85436ed607293bea14486391c 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
        .section .entry.text, "ax"
 
@@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
 
        /* kernel thread */
 1:     movl    %edi, %eax
-       call    *%ebx
+       CALL_NOSPEC %ebx
        /*
         * A kernel thread is allowed to return here after successfully
         * calling do_execve().  Exit to userspace to complete the execve()
@@ -919,7 +920,7 @@ common_exception:
        movl    %ecx, %es
        TRACE_IRQS_OFF
        movl    %esp, %eax                      # pt_regs pointer
-       call    *%edi
+       CALL_NOSPEC %edi
        jmp     ret_from_exception
 END(common_exception)
 
@@ -941,9 +942,10 @@ ENTRY(debug)
        movl    %esp, %eax                      # pt_regs pointer
 
        /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
        jb      .Ldebug_from_sysenter_stack
 
        TRACE_IRQS_OFF
@@ -984,9 +986,10 @@ ENTRY(nmi)
        movl    %esp, %eax                      # pt_regs pointer
 
        /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
        jb      .Lnmi_from_sysenter_stack
 
        /* Not on SYSENTER stack. */
index f81d50d7ceacdefa06d61482687937096c68421c..4f8e1d35a97ca7071a11f33f07e2b87c2eac7acf 100644 (file)
@@ -23,7 +23,6 @@
 #include <asm/segment.h>
 #include <asm/cache.h>
 #include <asm/errno.h>
-#include "calling.h"
 #include <asm/asm-offsets.h>
 #include <asm/msr.h>
 #include <asm/unistd.h>
 #include <asm/pgtable_types.h>
 #include <asm/export.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 #include <linux/err.h>
 
+#include "calling.h"
+
 .code64
 .section .entry.text, "ax"
 
@@ -140,6 +142,67 @@ END(native_usergs_sysret64)
  * with them due to bugs in both AMD and Intel CPUs.
  */
 
+       .pushsection .entry_trampoline, "ax"
+
+/*
+ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
+ * that the assembler and linker have the wrong idea as to where this code
+ * lives (and, in fact, it's mapped more than once, so it's not even at a
+ * fixed address).  So we can't reference any symbols outside the entry
+ * trampoline and expect it to work.
+ *
+ * Instead, we carefully abuse %rip-relative addressing.
+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
+ * trampoline.  We can thus find cpu_entry_area with this macro:
+ */
+
+#define CPU_ENTRY_AREA \
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+#define RSP_SCRATCH    CPU_ENTRY_AREA_entry_stack + \
+                       SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+
+ENTRY(entry_SYSCALL_64_trampoline)
+       UNWIND_HINT_EMPTY
+       swapgs
+
+       /* Stash the user RSP. */
+       movq    %rsp, RSP_SCRATCH
+
+       /* Note: using %rsp as a scratch reg. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+       /* Load the top of the task stack into RSP */
+       movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+
+       /* Start building the simulated IRET frame. */
+       pushq   $__USER_DS                      /* pt_regs->ss */
+       pushq   RSP_SCRATCH                     /* pt_regs->sp */
+       pushq   %r11                            /* pt_regs->flags */
+       pushq   $__USER_CS                      /* pt_regs->cs */
+       pushq   %rcx                            /* pt_regs->ip */
+
+       /*
+        * x86 lacks a near absolute jump, and we can't jump to the real
+        * entry text with a relative jump.  We could push the target
+        * address and then use retq, but this destroys the pipeline on
+        * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
+        * spill RDI and restore it in a second-stage trampoline.
+        */
+       pushq   %rdi
+       movq    $entry_SYSCALL_64_stage2, %rdi
+       JMP_NOSPEC %rdi
+END(entry_SYSCALL_64_trampoline)
+
+       .popsection
+
+ENTRY(entry_SYSCALL_64_stage2)
+       UNWIND_HINT_EMPTY
+       popq    %rdi
+       jmp     entry_SYSCALL_64_after_hwframe
+END(entry_SYSCALL_64_stage2)
+
 ENTRY(entry_SYSCALL_64)
        UNWIND_HINT_EMPTY
        /*
@@ -149,6 +212,10 @@ ENTRY(entry_SYSCALL_64)
         */
 
        swapgs
+       /*
+        * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+        * is not required to switch CR3.
+        */
        movq    %rsp, PER_CPU_VAR(rsp_scratch)
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
@@ -204,7 +271,12 @@ entry_SYSCALL_64_fastpath:
         * It might end up jumping to the slow path.  If it jumps, RAX
         * and all argument registers are clobbered.
         */
+#ifdef CONFIG_RETPOLINE
+       movq    sys_call_table(, %rax, 8), %rax
+       call    __x86_indirect_thunk_rax
+#else
        call    *sys_call_table(, %rax, 8)
+#endif
 .Lentry_SYSCALL_64_after_fastpath_call:
 
        movq    %rax, RAX(%rsp)
@@ -330,8 +402,25 @@ syscall_return_via_sysret:
        popq    %rsi    /* skip rcx */
        popq    %rdx
        popq    %rsi
+
+       /*
+        * Now all regs are restored except RSP and RDI.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       pushq   RSP-RDI(%rdi)   /* RSP */
+       pushq   (%rdi)          /* RDI */
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
        popq    %rdi
-       movq    RSP-ORIG_RAX(%rsp), %rsp
+       popq    %rsp
        USERGS_SYSRET64
 END(entry_SYSCALL_64)
 
@@ -359,7 +448,7 @@ ENTRY(stub_ptregs_64)
        jmp     entry_SYSCALL64_slow_path
 
 1:
-       jmp     *%rax                           /* Called from C */
+       JMP_NOSPEC %rax                         /* Called from C */
 END(stub_ptregs_64)
 
 .macro ptregs_stub func
@@ -438,7 +527,7 @@ ENTRY(ret_from_fork)
 1:
        /* kernel thread */
        movq    %r12, %rdi
-       call    *%rbx
+       CALL_NOSPEC %rbx
        /*
         * A kernel thread is allowed to return here after successfully
         * calling do_execve().  Exit to userspace to complete the execve()
@@ -466,12 +555,13 @@ END(irq_entries_start)
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
-       pushfq
-       testl $X86_EFLAGS_IF, (%rsp)
+       pushq %rax
+       SAVE_FLAGS(CLBR_RAX)
+       testl $X86_EFLAGS_IF, %eax
        jz .Lokay_\@
        ud2
 .Lokay_\@:
-       addq $8, %rsp
+       popq %rax
 #endif
 .endm
 
@@ -563,6 +653,13 @@ END(irq_entries_start)
 /* 0(%rsp): ~(interrupt number) */
        .macro interrupt func
        cld
+
+       testb   $3, CS-ORIG_RAX(%rsp)
+       jz      1f
+       SWAPGS
+       call    switch_to_thread_stack
+1:
+
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
@@ -572,12 +669,8 @@ END(irq_entries_start)
        jz      1f
 
        /*
-        * IRQ from user mode.  Switch to kernel gsbase and inform context
-        * tracking that we're in kernel mode.
-        */
-       SWAPGS
-
-       /*
+        * IRQ from user mode.
+        *
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
         * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +723,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
        ud2
 1:
 #endif
-       SWAPGS
        POP_EXTRA_REGS
-       POP_C_REGS
-       addq    $8, %rsp        /* skip regs->orig_ax */
+       popq    %r11
+       popq    %r10
+       popq    %r9
+       popq    %r8
+       popq    %rax
+       popq    %rcx
+       popq    %rdx
+       popq    %rsi
+
+       /*
+        * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       /* Copy the IRET frame to the trampoline stack. */
+       pushq   6*8(%rdi)       /* SS */
+       pushq   5*8(%rdi)       /* RSP */
+       pushq   4*8(%rdi)       /* EFLAGS */
+       pushq   3*8(%rdi)       /* CS */
+       pushq   2*8(%rdi)       /* RIP */
+
+       /* Push user RDI on the trampoline stack. */
+       pushq   (%rdi)
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
+       /* Restore RDI. */
+       popq    %rdi
+       SWAPGS
        INTERRUPT_RETURN
 
 
@@ -713,7 +839,9 @@ native_irq_return_ldt:
         */
 
        pushq   %rdi                            /* Stash user RDI */
-       SWAPGS
+       SWAPGS                                  /* to kernel GS */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
+
        movq    PER_CPU_VAR(espfix_waddr), %rdi
        movq    %rax, (0*8)(%rdi)               /* user RAX */
        movq    (1*8)(%rsp), %rax               /* user RIP */
@@ -729,7 +857,6 @@ native_irq_return_ldt:
        /* Now RAX == RSP. */
 
        andl    $0xffff0000, %eax               /* RAX = (RSP & 0xffff0000) */
-       popq    %rdi                            /* Restore user RDI */
 
        /*
         * espfix_stack[31:16] == 0.  The page tables are set up such that
@@ -740,7 +867,11 @@ native_irq_return_ldt:
         * still points to an RO alias of the ESPFIX stack.
         */
        orq     PER_CPU_VAR(espfix_stack), %rax
-       SWAPGS
+
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+       SWAPGS                                  /* to user GS */
+       popq    %rdi                            /* Restore user RDI */
+
        movq    %rax, %rsp
        UNWIND_HINT_IRET_REGS offset=8
 
@@ -829,7 +960,35 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
 /*
  * Exception entry points.
  */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+
+/*
+ * Switch to the thread stack.  This is called with the IRET frame and
+ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+ENTRY(switch_to_thread_stack)
+       UNWIND_HINT_FUNC
+
+       pushq   %rdi
+       /* Need to switch before accessing the thread stack. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+       pushq   7*8(%rdi)               /* regs->ss */
+       pushq   6*8(%rdi)               /* regs->rsp */
+       pushq   5*8(%rdi)               /* regs->eflags */
+       pushq   4*8(%rdi)               /* regs->cs */
+       pushq   3*8(%rdi)               /* regs->ip */
+       pushq   2*8(%rdi)               /* regs->orig_ax */
+       pushq   8(%rdi)                 /* return address */
+       UNWIND_HINT_FUNC
+
+       movq    (%rdi), %rdi
+       ret
+END(switch_to_thread_stack)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -848,11 +1007,12 @@ ENTRY(\sym)
 
        ALLOC_PT_GPREGS_ON_STACK
 
-       .if \paranoid
-       .if \paranoid == 1
+       .if \paranoid < 2
        testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
-       jnz     1f
+       jnz     .Lfrom_usermode_switch_stack_\@
        .endif
+
+       .if \paranoid
        call    paranoid_entry
        .else
        call    error_entry
@@ -894,20 +1054,15 @@ ENTRY(\sym)
        jmp     error_exit
        .endif
 
-       .if \paranoid == 1
+       .if \paranoid < 2
        /*
-        * Paranoid entry from userspace.  Switch stacks and treat it
+        * Entry from userspace.  Switch stacks and treat it
         * as a normal entry.  This means that paranoid handlers
         * run in real process context if user_mode(regs).
         */
-1:
+.Lfrom_usermode_switch_stack_\@:
        call    error_entry
 
-
-       movq    %rsp, %rdi                      /* pt_regs pointer */
-       call    sync_regs
-       movq    %rax, %rsp                      /* switch stack */
-
        movq    %rsp, %rdi                      /* pt_regs pointer */
 
        .if \has_error_code
@@ -1119,7 +1274,11 @@ ENTRY(paranoid_entry)
        js      1f                              /* negative -> in kernel */
        SWAPGS
        xorl    %ebx, %ebx
-1:     ret
+
+1:
+       SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+
+       ret
 END(paranoid_entry)
 
 /*
@@ -1141,6 +1300,7 @@ ENTRY(paranoid_exit)
        testl   %ebx, %ebx                      /* swapgs needed? */
        jnz     .Lparanoid_exit_no_swapgs
        TRACE_IRQS_IRETQ
+       RESTORE_CR3     scratch_reg=%rbx save_reg=%r14
        SWAPGS_UNSAFE_STACK
        jmp     .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
@@ -1168,8 +1328,18 @@ ENTRY(error_entry)
         * from user mode due to an IRET fault.
         */
        SWAPGS
+       /* We have user CR3.  Change to kernel CR3. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
 .Lerror_entry_from_usermode_after_swapgs:
+       /* Put us onto the real thread stack. */
+       popq    %r12                            /* save return addr in %12 */
+       movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
+       call    sync_regs
+       movq    %rax, %rsp                      /* switch stack */
+       ENCODE_FRAME_POINTER
+       pushq   %r12
+
        /*
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
@@ -1206,6 +1376,7 @@ ENTRY(error_entry)
         * .Lgs_change's error handler with kernel gsbase.
         */
        SWAPGS
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
        jmp .Lerror_entry_done
 
 .Lbstep_iret:
@@ -1215,10 +1386,11 @@ ENTRY(error_entry)
 
 .Lerror_bad_iret:
        /*
-        * We came from an IRET to user mode, so we have user gsbase.
-        * Switch to kernel gsbase:
+        * We came from an IRET to user mode, so we have user
+        * gsbase and CR3.  Switch to kernel gsbase and CR3:
         */
        SWAPGS
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
        /*
         * Pretend that the exception came from user mode: set up pt_regs
@@ -1250,6 +1422,10 @@ END(error_exit)
 /*
  * Runs on exception stack.  Xen PV does not go through this path at all,
  * so we can use real assembly here.
+ *
+ * Registers:
+ *     %r14: Used to save/restore the CR3 of the interrupted context
+ *           when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
  */
 ENTRY(nmi)
        UNWIND_HINT_IRET_REGS
@@ -1313,6 +1489,7 @@ ENTRY(nmi)
 
        swapgs
        cld
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
        movq    %rsp, %rdx
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
        UNWIND_HINT_IRET_REGS base=%rdx offset=8
@@ -1565,6 +1742,8 @@ end_repeat_nmi:
        movq    $-1, %rsi
        call    do_nmi
 
+       RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+
        testl   %ebx, %ebx                      /* swapgs needed? */
        jnz     nmi_restore
 nmi_swapgs:
index 568e130d932cd2a7d44393e5fc52408cffe64f34..98d5358e4041a7e144ec566f7db19ff054cedbcc 100644 (file)
  */
 ENTRY(entry_SYSENTER_compat)
        /* Interrupts are off on entry. */
-       SWAPGS_UNSAFE_STACK
+       SWAPGS
+
+       /* We are about to clobber %rsp anyway, clobbering here is OK */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
        /*
@@ -186,8 +190,13 @@ ENTRY(entry_SYSCALL_compat)
        /* Interrupts are off on entry. */
        swapgs
 
-       /* Stash user ESP and switch to the kernel stack. */
+       /* Stash user ESP */
        movl    %esp, %r8d
+
+       /* Use %rsp as scratch reg. User ESP is stashed in r8 */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+       /* Switch to the kernel stack */
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
        /* Construct struct pt_regs on stack */
@@ -256,10 +265,22 @@ sysret32_from_system_call:
         * when the system call started, which is already known to user
         * code.  We zero R8-R10 to avoid info leaks.
          */
+       movq    RSP-ORIG_RAX(%rsp), %rsp
+
+       /*
+        * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
+        * on the process stack which is not mapped to userspace and
+        * not readable after we SWITCH_TO_USER_CR3.  Delay the CR3
+        * switch until after after the last reference to the process
+        * stack.
+        *
+        * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
+        */
+       SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
+
        xorq    %r8, %r8
        xorq    %r9, %r9
        xorq    %r10, %r10
-       movq    RSP-ORIG_RAX(%rsp), %rsp
        swapgs
        sysretl
 END(entry_SYSCALL_compat)
@@ -306,8 +327,11 @@ ENTRY(entry_INT80_compat)
         */
        movl    %eax, %eax
 
-       /* Construct struct pt_regs on stack (iret frame is already on stack) */
        pushq   %rax                    /* pt_regs->orig_ax */
+
+       /* switch to thread stack expects orig_ax to be pushed */
+       call    switch_to_thread_stack
+
        pushq   %rdi                    /* pt_regs->di */
        pushq   %rsi                    /* pt_regs->si */
        pushq   %rdx                    /* pt_regs->dx */
index f279ba2643dc8933b9659242082e7ef2ea2d9dd6..577fa8adb785baf5ea1c993a2bbc88adf43fbbcc 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/unistd.h>
 #include <asm/fixmap.h>
 #include <asm/traps.h>
+#include <asm/paravirt.h>
 
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
        WARN_ON_ONCE(address != regs->ip);
 
+       /* This should be unreachable in NATIVE mode. */
+       if (WARN_ON(vsyscall_mode == NATIVE))
+               return false;
+
        if (vsyscall_mode == NONE) {
                warn_bad_vsyscall(KERN_INFO, regs,
                                  "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
        return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
 }
 
+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present.  If so, we skip calling
+ * this.
+ */
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
+{
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
+       set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+       p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+       p4d->p4d |= _PAGE_USER;
+#endif
+       pud = pud_offset(p4d, VSYSCALL_ADDR);
+       set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
+       pmd = pmd_offset(pud, VSYSCALL_ADDR);
+       set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
+}
+
 void __init map_vsyscall(void)
 {
        extern char __vsyscall_page;
        unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
 
-       if (vsyscall_mode != NONE)
+       if (vsyscall_mode != NONE) {
                __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
                             vsyscall_mode == NATIVE
                             ? PAGE_KERNEL_VSYSCALL
                             : PAGE_KERNEL_VVAR);
+               set_vsyscall_pgtable_user_bits(swapper_pg_dir);
+       }
 
        BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
                     (unsigned long)VSYSCALL_ADDR);
index 141e07b0621689e745582599c009f4af1d053c6c..24ffa1e88cf948ecbe9839f6d956c69a12ecd4f0 100644 (file)
@@ -582,6 +582,24 @@ static __init int bts_init(void)
        if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
                return -ENODEV;
 
+       if (boot_cpu_has(X86_FEATURE_PTI)) {
+               /*
+                * BTS hardware writes through a virtual memory map we must
+                * either use the kernel physical map, or the user mapping of
+                * the AUX buffer.
+                *
+                * However, since this driver supports per-CPU and per-task inherit
+                * we cannot use the user mapping since it will not be availble
+                * if we're not running the owning process.
+                *
+                * With PTI we can't use the kernal map either, because its not
+                * there when we run userspace.
+                *
+                * For now, disable this driver when using PTI.
+                */
+               return -ENODEV;
+       }
+
        bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
                                  PERF_PMU_CAP_EXCLUSIVE;
        bts_pmu.task_ctx_nr     = perf_sw_context;
index 09c26a4f139c125e000675689ebc983acd8ab91a..731153a4681e73f761dea8c0c15ce6757b89860e 100644 (file)
@@ -3847,6 +3847,8 @@ static struct attribute *intel_pmu_attrs[] = {
 
 __init int intel_pmu_init(void)
 {
+       struct attribute **extra_attr = NULL;
+       struct attribute **to_free = NULL;
        union cpuid10_edx edx;
        union cpuid10_eax eax;
        union cpuid10_ebx ebx;
@@ -3854,7 +3856,6 @@ __init int intel_pmu_init(void)
        unsigned int unused;
        struct extra_reg *er;
        int version, i;
-       struct attribute **extra_attr = NULL;
        char *name;
 
        if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
@@ -4294,6 +4295,7 @@ __init int intel_pmu_init(void)
                extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
                        hsw_format_attr : nhm_format_attr;
                extra_attr = merge_attr(extra_attr, skl_format_attr);
+               to_free = extra_attr;
                x86_pmu.cpu_events = get_hsw_events_attrs();
                intel_pmu_pebs_data_source_skl(
                        boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
@@ -4401,6 +4403,7 @@ __init int intel_pmu_init(void)
                pr_cont("full-width counters, ");
        }
 
+       kfree(to_free);
        return 0;
 }
 
index 3674a4b6f8bd0c5f12223b8f5c16067a933450df..8156e47da7ba4c5a31e9f9436b27a96cfb7da4c3 100644 (file)
@@ -3,16 +3,19 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/perf_event.h>
+#include <asm/tlbflush.h>
 #include <asm/insn.h>
 
 #include "../perf_event.h"
 
+/* Waste a full page so it can be mapped into the cpu_entry_area */
+DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE                24
 
-#define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
 #define PEBS_FIXUP_SIZE                PAGE_SIZE
 
 /*
@@ -279,17 +282,67 @@ void fini_debug_store_on_cpu(int cpu)
 
 static DEFINE_PER_CPU(void *, insn_buffer);
 
-static int alloc_pebs_buffer(int cpu)
+static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       unsigned long start = (unsigned long)cea;
+       phys_addr_t pa;
+       size_t msz = 0;
+
+       pa = virt_to_phys(addr);
+
+       preempt_disable();
+       for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
+               cea_set_pte(cea, pa, prot);
+
+       /*
+        * This is a cross-CPU update of the cpu_entry_area, we must shoot down
+        * all TLB entries for it.
+        */
+       flush_tlb_kernel_range(start, start + size);
+       preempt_enable();
+}
+
+static void ds_clear_cea(void *cea, size_t size)
+{
+       unsigned long start = (unsigned long)cea;
+       size_t msz = 0;
+
+       preempt_disable();
+       for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
+               cea_set_pte(cea, 0, PAGE_NONE);
+
+       flush_tlb_kernel_range(start, start + size);
+       preempt_enable();
+}
+
+static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
+{
+       unsigned int order = get_order(size);
        int node = cpu_to_node(cpu);
-       int max;
-       void *buffer, *ibuffer;
+       struct page *page;
+
+       page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
+       return page ? page_address(page) : NULL;
+}
+
+static void dsfree_pages(const void *buffer, size_t size)
+{
+       if (buffer)
+               free_pages((unsigned long)buffer, get_order(size));
+}
+
+static int alloc_pebs_buffer(int cpu)
+{
+       struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+       struct debug_store *ds = hwev->ds;
+       size_t bsiz = x86_pmu.pebs_buffer_size;
+       int max, node = cpu_to_node(cpu);
+       void *buffer, *ibuffer, *cea;
 
        if (!x86_pmu.pebs)
                return 0;
 
-       buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+       buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
        if (unlikely(!buffer))
                return -ENOMEM;
 
@@ -300,25 +353,27 @@ static int alloc_pebs_buffer(int cpu)
        if (x86_pmu.intel_cap.pebs_format < 2) {
                ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
                if (!ibuffer) {
-                       kfree(buffer);
+                       dsfree_pages(buffer, bsiz);
                        return -ENOMEM;
                }
                per_cpu(insn_buffer, cpu) = ibuffer;
        }
-
-       max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
-
-       ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+       hwev->ds_pebs_vaddr = buffer;
+       /* Update the cpu entry area mapping */
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+       ds->pebs_buffer_base = (unsigned long) cea;
+       ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
        ds->pebs_index = ds->pebs_buffer_base;
-       ds->pebs_absolute_maximum = ds->pebs_buffer_base +
-               max * x86_pmu.pebs_record_size;
-
+       max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
+       ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
        return 0;
 }
 
 static void release_pebs_buffer(int cpu)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+       struct debug_store *ds = hwev->ds;
+       void *cea;
 
        if (!ds || !x86_pmu.pebs)
                return;
@@ -326,73 +381,70 @@ static void release_pebs_buffer(int cpu)
        kfree(per_cpu(insn_buffer, cpu));
        per_cpu(insn_buffer, cpu) = NULL;
 
-       kfree((void *)(unsigned long)ds->pebs_buffer_base);
+       /* Clear the fixmap */
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
+       ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
        ds->pebs_buffer_base = 0;
+       dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
+       hwev->ds_pebs_vaddr = NULL;
 }
 
 static int alloc_bts_buffer(int cpu)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-       int node = cpu_to_node(cpu);
-       int max, thresh;
-       void *buffer;
+       struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+       struct debug_store *ds = hwev->ds;
+       void *buffer, *cea;
+       int max;
 
        if (!x86_pmu.bts)
                return 0;
 
-       buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+       buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
        if (unlikely(!buffer)) {
                WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
                return -ENOMEM;
        }
-
-       max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
-       thresh = max / 16;
-
-       ds->bts_buffer_base = (u64)(unsigned long)buffer;
+       hwev->ds_bts_vaddr = buffer;
+       /* Update the fixmap */
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+       ds->bts_buffer_base = (unsigned long) cea;
+       ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
        ds->bts_index = ds->bts_buffer_base;
-       ds->bts_absolute_maximum = ds->bts_buffer_base +
-               max * BTS_RECORD_SIZE;
-       ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
-               thresh * BTS_RECORD_SIZE;
-
+       max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
+       ds->bts_absolute_maximum = ds->bts_buffer_base + max;
+       ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
        return 0;
 }
 
 static void release_bts_buffer(int cpu)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+       struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
+       struct debug_store *ds = hwev->ds;
+       void *cea;
 
        if (!ds || !x86_pmu.bts)
                return;
 
-       kfree((void *)(unsigned long)ds->bts_buffer_base);
+       /* Clear the fixmap */
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
+       ds_clear_cea(cea, BTS_BUFFER_SIZE);
        ds->bts_buffer_base = 0;
+       dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
+       hwev->ds_bts_vaddr = NULL;
 }
 
 static int alloc_ds_buffer(int cpu)
 {
-       int node = cpu_to_node(cpu);
-       struct debug_store *ds;
-
-       ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
-       if (unlikely(!ds))
-               return -ENOMEM;
+       struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
 
+       memset(ds, 0, sizeof(*ds));
        per_cpu(cpu_hw_events, cpu).ds = ds;
-
        return 0;
 }
 
 static void release_ds_buffer(int cpu)
 {
-       struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-       if (!ds)
-               return;
-
        per_cpu(cpu_hw_events, cpu).ds = NULL;
-       kfree(ds);
 }
 
 void release_ds_buffers(void)
index f7aaadf9331fb75587e74a42b041d78b2a014fc0..8e4ea143ed96403d275bf6727801961db9a053d7 100644 (file)
@@ -14,6 +14,8 @@
 
 #include <linux/perf_event.h>
 
+#include <asm/intel_ds.h>
+
 /* To enable MSR tracing please use the generic trace points. */
 
 /*
@@ -77,8 +79,6 @@ struct amd_nb {
        struct event_constraint event_constraints[X86_PMC_IDX_MAX];
 };
 
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS                8
 #define PEBS_COUNTER_MASK      ((1ULL << MAX_PEBS_EVENTS) - 1)
 
 /*
@@ -95,23 +95,6 @@ struct amd_nb {
        PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
        PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
 
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-       u64     bts_buffer_base;
-       u64     bts_index;
-       u64     bts_absolute_maximum;
-       u64     bts_interrupt_threshold;
-       u64     pebs_buffer_base;
-       u64     pebs_index;
-       u64     pebs_absolute_maximum;
-       u64     pebs_interrupt_threshold;
-       u64     pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
 #define PEBS_REGS \
        (PERF_REG_X86_AX | \
         PERF_REG_X86_BX | \
@@ -216,6 +199,8 @@ struct cpu_hw_events {
         * Intel DebugStore bits
         */
        struct debug_store      *ds;
+       void                    *ds_pebs_vaddr;
+       void                    *ds_bts_vaddr;
        u64                     pebs_enabled;
        int                     n_pebs;
        int                     n_large_pebs;
index dbfd0854651fe4fa13154e3ce04e9baa1f643637..cf5961ca867746ae2eadb4a2b6f2c9068cec48c4 100644 (file)
@@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
        ".popsection\n"                                                 \
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
        ALTINSTR_REPLACEMENT(newinstr, feature, 1)                      \
-       ".popsection"
+       ".popsection\n"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
        OLDINSTR_2(oldinstr, 1, 2)                                      \
@@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
        ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)                    \
        ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)                    \
-       ".popsection"
+       ".popsection\n"
 
 /*
  * Alternative instructions for different CPU types or capabilities.
index ff700d81e91efcf89a2f2ac725b3e80d868c8621..0927cdc4f9460165a9159b1c4f7bea0e759bdbb9 100644 (file)
 #include <asm/pgtable.h>
 #include <asm/special_insns.h>
 #include <asm/preempt.h>
+#include <asm/asm.h>
 
 #ifndef CONFIG_X86_CMPXCHG64
 extern void cmpxchg8b_emu(void);
 #endif
+
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_32
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void);
+#else
+#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void);
+INDIRECT_THUNK(8)
+INDIRECT_THUNK(9)
+INDIRECT_THUNK(10)
+INDIRECT_THUNK(11)
+INDIRECT_THUNK(12)
+INDIRECT_THUNK(13)
+INDIRECT_THUNK(14)
+INDIRECT_THUNK(15)
+#endif
+INDIRECT_THUNK(ax)
+INDIRECT_THUNK(bx)
+INDIRECT_THUNK(cx)
+INDIRECT_THUNK(dx)
+INDIRECT_THUNK(si)
+INDIRECT_THUNK(di)
+INDIRECT_THUNK(bp)
+INDIRECT_THUNK(sp)
+#endif /* CONFIG_RETPOLINE */
index 219faaec51dfa192f69d8893c8844219c0c89029..386a6900e206f6578e3b38ee7f085d36ac50a928 100644 (file)
 #endif
 
 #ifndef __ASSEMBLY__
+#ifndef __BPF__
 /*
  * This output constraint should be used for any inline asm which has a "call"
  * instruction.  Otherwise the asm may be inserted before the frame pointer
 register unsigned long current_stack_pointer asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
 #endif
+#endif
 
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
new file mode 100644 (file)
index 0000000..4a7884b
--- /dev/null
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
+#define _ASM_X86_CPU_ENTRY_AREA_H
+
+#include <linux/percpu-defs.h>
+#include <asm/processor.h>
+#include <asm/intel_ds.h>
+
+/*
+ * cpu_entry_area is a percpu region that contains things needed by the CPU
+ * and early entry/exit code.  Real types aren't used for all fields here
+ * to avoid circular header dependencies.
+ *
+ * Every field is a virtual alias of some other allocated backing store.
+ * There is no direct allocation of a struct cpu_entry_area.
+ */
+struct cpu_entry_area {
+       char gdt[PAGE_SIZE];
+
+       /*
+        * The GDT is just below entry_stack and thus serves (on x86_64) as
+        * a a read-only guard page.
+        */
+       struct entry_stack_page entry_stack_page;
+
+       /*
+        * On x86_64, the TSS is mapped RO.  On x86_32, it's mapped RW because
+        * we need task switches to work, and task switches write to the TSS.
+        */
+       struct tss_struct tss;
+
+       char entry_trampoline[PAGE_SIZE];
+
+#ifdef CONFIG_X86_64
+       /*
+        * Exception stacks used for IST entries.
+        *
+        * In the future, this should have a separate slot for each stack
+        * with guard pages between them.
+        */
+       char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+#endif
+#ifdef CONFIG_CPU_SUP_INTEL
+       /*
+        * Per CPU debug store for Intel performance monitoring. Wastes a
+        * full page at the moment.
+        */
+       struct debug_store cpu_debug_store;
+       /*
+        * The actual PEBS/BTS buffers must be mapped to user space
+        * Reserve enough fixmap PTEs.
+        */
+       struct debug_store_buffers cpu_debug_buffers;
+#endif
+};
+
+#define CPU_ENTRY_AREA_SIZE    (sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_TOT_SIZE        (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+
+extern void setup_cpu_entry_areas(void);
+extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+
+#define        CPU_ENTRY_AREA_RO_IDT           CPU_ENTRY_AREA_BASE
+#define CPU_ENTRY_AREA_PER_CPU         (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
+
+#define CPU_ENTRY_AREA_RO_IDT_VADDR    ((void *)CPU_ENTRY_AREA_RO_IDT)
+
+#define CPU_ENTRY_AREA_MAP_SIZE                        \
+       (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
+
+extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
+
+static inline struct entry_stack *cpu_entry_stack(int cpu)
+{
+       return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
+}
+
+#endif
index bf6a76202a779ee131b4df8c89449ab52abd0a79..ea9a7dde62e5c4d551ba89e429f911fb5c6603fd 100644 (file)
@@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
        set_bit(bit, (unsigned long *)cpu_caps_set);    \
 } while (0)
 
+#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
+
 #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
 /*
  * Static testing of CPU features.  Used the same as boot_cpu_has().
index 800104c8a3edfee7f4f52a33b8451a51ee0ed90a..f275447862f4583909d14b6e2469c16e4a81b951 100644 (file)
 #define X86_FEATURE_CAT_L3             ( 7*32+ 4) /* Cache Allocation Technology L3 */
 #define X86_FEATURE_CAT_L2             ( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3             ( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_INVPCID_SINGLE     ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 
 #define X86_FEATURE_HW_PSTATE          ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_SME                        ( 7*32+10) /* AMD Secure Memory Encryption */
-
+#define X86_FEATURE_PTI                        ( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE          ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD      ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN         ( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT           ( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW      ( 7*32+16) /* AVX-512 Neural Network Instructions */
 #define X86_BUG_SWAPGS_FENCE           X86_BUG(11) /* SWAPGS without input dep on GS */
 #define X86_BUG_MONITOR                        X86_BUG(12) /* IPI required to wake up remote CPU */
 #define X86_BUG_AMD_E400               X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN           X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1             X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2             X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
index 4011cb03ef08e52db15f52779ce366c26359a34b..13c5ee878a477902b8494532b24853b6c156a170 100644 (file)
@@ -7,6 +7,7 @@
 #include <asm/mmu.h>
 #include <asm/fixmap.h>
 #include <asm/irq_vectors.h>
+#include <asm/cpu_entry_area.h>
 
 #include <linux/smp.h>
 #include <linux/percpu.h>
@@ -20,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
 
        desc->type              = (info->read_exec_only ^ 1) << 1;
        desc->type             |= info->contents << 2;
+       /* Set the ACCESS bit so it can be mapped RO */
+       desc->type             |= 1;
 
        desc->s                 = 1;
        desc->dpl               = 0x3;
@@ -60,17 +63,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
        return this_cpu_ptr(&gdt_page)->gdt;
 }
 
-/* Get the fixmap index for a specific processor */
-static inline unsigned int get_cpu_gdt_ro_index(int cpu)
-{
-       return FIX_GDT_REMAP_BEGIN + cpu;
-}
-
 /* Provide the fixmap address of the remapped GDT */
 static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
 {
-       unsigned int idx = get_cpu_gdt_ro_index(cpu);
-       return (struct desc_struct *)__fix_to_virt(idx);
+       return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
 }
 
 /* Provide the current read-only GDT */
@@ -185,7 +181,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
 #endif
 }
 
-static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
+static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
 {
        struct desc_struct *d = get_cpu_gdt_rw(cpu);
        tss_desc tss;
index 14d6d50073142b0f49b06850ccd0d394546479ee..b027633e73003e121d7c043438ac7dbd10fc07a4 100644 (file)
 # define DISABLE_LA57  (1<<(X86_FEATURE_LA57 & 31))
 #endif
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define DISABLE_PTI           0
+#else
+# define DISABLE_PTI           (1 << (X86_FEATURE_PTI & 31))
+#endif
+
 /*
  * Make sure to add features to the correct mask
  */
@@ -60,7 +66,7 @@
 #define DISABLED_MASK4 (DISABLE_PCID)
 #define DISABLED_MASK5 0
 #define DISABLED_MASK6 0
-#define DISABLED_MASK7 0
+#define DISABLED_MASK7 (DISABLE_PTI)
 #define DISABLED_MASK8 0
 #define DISABLED_MASK9 (DISABLE_MPX)
 #define DISABLED_MASK10        0
index 0211029076ea8b9ed6648b9bf298c99c8b2124ad..6777480d8a427eaaa07559f77985c125aa66bb6c 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_ESPFIX_H
 #define _ASM_X86_ESPFIX_H
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_ESPFIX64
 
 #include <asm/percpu.h>
 
@@ -11,7 +11,8 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr);
 
 extern void init_espfix_bsp(void);
 extern void init_espfix_ap(int cpu);
-
-#endif /* CONFIG_X86_64 */
+#else
+static inline void init_espfix_ap(int cpu) { }
+#endif
 
 #endif /* _ASM_X86_ESPFIX_H */
index b0c505fe9a958c701fef6d96f281bb8ab1a773de..64c4a30e0d39621ff8587fc8da538cd3d1d9f144 100644 (file)
@@ -44,7 +44,6 @@ extern unsigned long __FIXADDR_TOP;
                         PAGE_SIZE)
 #endif
 
-
 /*
  * Here we define all the compile-time 'special' virtual
  * addresses. The point is to have a constant address at
@@ -84,7 +83,6 @@ enum fixed_addresses {
        FIX_IO_APIC_BASE_0,
        FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
 #endif
-       FIX_RO_IDT,     /* Virtual mapping for read-only IDT */
 #ifdef CONFIG_X86_32
        FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
        FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
@@ -100,9 +98,6 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_INTEL_MID
        FIX_LNW_VRTC,
 #endif
-       /* Fixmap entries to remap the GDTs, one per processor. */
-       FIX_GDT_REMAP_BEGIN,
-       FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
 
 #ifdef CONFIG_ACPI_APEI_GHES
        /* Used for GHES mapping from assorted contexts */
@@ -143,7 +138,7 @@ enum fixed_addresses {
 extern void reserve_top_address(unsigned long reserve);
 
 #define FIXADDR_SIZE   (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
-#define FIXADDR_START          (FIXADDR_TOP - FIXADDR_SIZE)
+#define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
 
 extern int fixmaps_set;
 
index 1b0a5abcd8aeb6e700013c5434aaeb0bba7a152f..96aa6b9884dc5b3bc8d54c9ef1c6258eea13a0d0 100644 (file)
 #ifndef _ASM_X86_HYPERVISOR_H
 #define _ASM_X86_HYPERVISOR_H
 
-#ifdef CONFIG_HYPERVISOR_GUEST
-
-#include <asm/kvm_para.h>
-#include <asm/x86_init.h>
-#include <asm/xen/hypervisor.h>
-
-/*
- * x86 hypervisor information
- */
-
+/* x86 hypervisor types  */
 enum x86_hypervisor_type {
        X86_HYPER_NATIVE = 0,
        X86_HYPER_VMWARE,
@@ -39,6 +30,12 @@ enum x86_hypervisor_type {
        X86_HYPER_KVM,
 };
 
+#ifdef CONFIG_HYPERVISOR_GUEST
+
+#include <asm/kvm_para.h>
+#include <asm/x86_init.h>
+#include <asm/xen/hypervisor.h>
+
 struct hypervisor_x86 {
        /* Hypervisor name */
        const char      *name;
@@ -58,7 +55,15 @@ struct hypervisor_x86 {
 
 extern enum x86_hypervisor_type x86_hyper_type;
 extern void init_hypervisor_platform(void);
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+       return x86_hyper_type == type;
+}
 #else
 static inline void init_hypervisor_platform(void) { }
+static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
+{
+       return type == X86_HYPER_NATIVE;
+}
 #endif /* CONFIG_HYPERVISOR_GUEST */
 #endif /* _ASM_X86_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
new file mode 100644 (file)
index 0000000..62a9f49
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef _ASM_INTEL_DS_H
+#define _ASM_INTEL_DS_H
+
+#include <linux/percpu-defs.h>
+
+#define BTS_BUFFER_SIZE                (PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE       (PAGE_SIZE << 4)
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS                8
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+       u64     bts_buffer_base;
+       u64     bts_index;
+       u64     bts_absolute_maximum;
+       u64     bts_interrupt_threshold;
+       u64     pebs_buffer_base;
+       u64     pebs_index;
+       u64     pebs_absolute_maximum;
+       u64     pebs_interrupt_threshold;
+       u64     pebs_event_reset[MAX_PEBS_EVENTS];
+} __aligned(PAGE_SIZE);
+
+DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
+
+struct debug_store_buffers {
+       char    bts_buffer[BTS_BUFFER_SIZE];
+       char    pebs_buffer[PEBS_BUFFER_SIZE];
+};
+
+#endif
diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h
new file mode 100644 (file)
index 0000000..989cfa8
--- /dev/null
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INVPCID
+#define _ASM_X86_INVPCID
+
+static inline void __invpcid(unsigned long pcid, unsigned long addr,
+                            unsigned long type)
+{
+       struct { u64 d[2]; } desc = { { pcid, addr } };
+
+       /*
+        * The memory clobber is because the whole point is to invalidate
+        * stale TLB entries and, especially if we're flushing global
+        * mappings, we don't want the compiler to reorder any subsequent
+        * memory accesses before the TLB flush.
+        *
+        * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
+        * invpcid (%rcx), %rax in long mode.
+        */
+       asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+                     : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+}
+
+#define INVPCID_TYPE_INDIV_ADDR                0
+#define INVPCID_TYPE_SINGLE_CTXT       1
+#define INVPCID_TYPE_ALL_INCL_GLOBAL   2
+#define INVPCID_TYPE_ALL_NON_GLOBAL    3
+
+/* Flush all mappings for a given pcid and addr, not including globals. */
+static inline void invpcid_flush_one(unsigned long pcid,
+                                    unsigned long addr)
+{
+       __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invpcid_flush_single_context(unsigned long pcid)
+{
+       __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+}
+
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invpcid_flush_all(void)
+{
+       __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invpcid_flush_all_nonglobals(void)
+{
+       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+}
+
+#endif /* _ASM_X86_INVPCID */
index 139feef467f7e298c6f9db57c43facc64f5468b6..c066ffae222b769996b78c13ee5d063dc7c5c544 100644 (file)
@@ -44,7 +44,7 @@ extern int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
 extern void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
                              unsigned int nr_irqs);
 extern int mp_irqdomain_activate(struct irq_domain *domain,
-                                struct irq_data *irq_data, bool early);
+                                struct irq_data *irq_data, bool reserve);
 extern void mp_irqdomain_deactivate(struct irq_domain *domain,
                                    struct irq_data *irq_data);
 extern int mp_irqdomain_ioapic_idx(struct irq_domain *domain);
index c8ef23f2c28f17c59308b9c41179c47f85e075ad..89f08955fff733c688a5ce4f4a0b8d74050ee617 100644 (file)
@@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
        swapgs;                                 \
        sysretl
 
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(x)          pushfq; popq %rax
+#endif
 #else
 #define INTERRUPT_RETURN               iret
 #define ENABLE_INTERRUPTS_SYSEXIT      sti; sysexit
index f86a8caa561e8873c3f34e6e8b8cd509ebadd819..395c9631e000a3a17aa574c1b25fcc2cafd5b5fb 100644 (file)
@@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
 extern void __show_regs(struct pt_regs *regs, int all);
+extern void show_iret_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
 
index 9ea26f16749706fddd5b15e8bf557a9e6156e165..5ff3e8af2c2056b7fe19560ee2ba1ad7146aaf2a 100644 (file)
@@ -3,6 +3,7 @@
 #define _ASM_X86_MMU_H
 
 #include <linux/spinlock.h>
+#include <linux/rwsem.h>
 #include <linux/mutex.h>
 #include <linux/atomic.h>
 
@@ -27,7 +28,8 @@ typedef struct {
        atomic64_t tlb_gen;
 
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
-       struct ldt_struct *ldt;
+       struct rw_semaphore     ldt_usr_sem;
+       struct ldt_struct       *ldt;
 #endif
 
 #ifdef CONFIG_X86_64
index 6d16d15d09a0daed96a1e3d670b6203d1779b98e..c931b88982a0ff59e3b67947cc606e452f327dc0 100644 (file)
@@ -50,22 +50,53 @@ struct ldt_struct {
         * call gates.  On native, we could merge the ldt_struct and LDT
         * allocations, but it's not worth trying to optimize.
         */
-       struct desc_struct *entries;
-       unsigned int nr_entries;
+       struct desc_struct      *entries;
+       unsigned int            nr_entries;
+
+       /*
+        * If PTI is in use, then the entries array is not mapped while we're
+        * in user mode.  The whole array will be aliased at the addressed
+        * given by ldt_slot_va(slot).  We use two slots so that we can allocate
+        * and map, and enable a new LDT without invalidating the mapping
+        * of an older, still-in-use LDT.
+        *
+        * slot will be -1 if this LDT doesn't have an alias mapping.
+        */
+       int                     slot;
 };
 
+/* This is a multiple of PAGE_SIZE. */
+#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
+
+static inline void *ldt_slot_va(int slot)
+{
+#ifdef CONFIG_X86_64
+       return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
+#else
+       BUG();
+#endif
+}
+
 /*
  * Used for LDT copy/destruction.
  */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
+static inline void init_new_context_ldt(struct mm_struct *mm)
+{
+       mm->context.ldt = NULL;
+       init_rwsem(&mm->context.ldt_usr_sem);
+}
+int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
 void destroy_context_ldt(struct mm_struct *mm);
+void ldt_arch_exit_mmap(struct mm_struct *mm);
 #else  /* CONFIG_MODIFY_LDT_SYSCALL */
-static inline int init_new_context_ldt(struct task_struct *tsk,
-                                      struct mm_struct *mm)
+static inline void init_new_context_ldt(struct mm_struct *mm) { }
+static inline int ldt_dup_context(struct mm_struct *oldmm,
+                                 struct mm_struct *mm)
 {
        return 0;
 }
-static inline void destroy_context_ldt(struct mm_struct *mm) {}
+static inline void destroy_context_ldt(struct mm_struct *mm) { }
+static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
 #endif
 
 static inline void load_mm_ldt(struct mm_struct *mm)
@@ -90,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
         * that we can see.
         */
 
-       if (unlikely(ldt))
-               set_ldt(ldt->entries, ldt->nr_entries);
-       else
+       if (unlikely(ldt)) {
+               if (static_cpu_has(X86_FEATURE_PTI)) {
+                       if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
+                               /*
+                                * Whoops -- either the new LDT isn't mapped
+                                * (if slot == -1) or is mapped into a bogus
+                                * slot (if slot > 1).
+                                */
+                               clear_LDT();
+                               return;
+                       }
+
+                       /*
+                        * If page table isolation is enabled, ldt->entries
+                        * will not be mapped in the userspace pagetables.
+                        * Tell the CPU to access the LDT through the alias
+                        * at ldt_slot_va(ldt->slot).
+                        */
+                       set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
+               } else {
+                       set_ldt(ldt->entries, ldt->nr_entries);
+               }
+       } else {
                clear_LDT();
+       }
 #else
        clear_LDT();
 #endif
@@ -132,18 +184,21 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 static inline int init_new_context(struct task_struct *tsk,
                                   struct mm_struct *mm)
 {
+       mutex_init(&mm->context.lock);
+
        mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
        atomic64_set(&mm->context.tlb_gen, 0);
 
-       #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
        if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
                /* pkey 0 is the default and always allocated */
                mm->context.pkey_allocation_map = 0x1;
                /* -1 means unallocated or invalid */
                mm->context.execute_only_pkey = -1;
        }
-       #endif
-       return init_new_context_ldt(tsk, mm);
+#endif
+       init_new_context_ldt(mm);
+       return 0;
 }
 static inline void destroy_context(struct mm_struct *mm)
 {
@@ -176,15 +231,16 @@ do {                                              \
 } while (0)
 #endif
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
        paravirt_arch_dup_mmap(oldmm, mm);
+       return ldt_dup_context(oldmm, mm);
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
 {
        paravirt_arch_exit_mmap(mm);
+       ldt_arch_exit_mmap(mm);
 }
 
 #ifdef CONFIG_X86_64
@@ -281,33 +337,6 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
        return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
-/*
- * If PCID is on, ASID-aware code paths put the ASID+1 into the PCID
- * bits.  This serves two purposes.  It prevents a nasty situation in
- * which PCID-unaware code saves CR3, loads some other value (with PCID
- * == 0), and then restores CR3, thus corrupting the TLB for ASID 0 if
- * the saved ASID was nonzero.  It also means that any bugs involving
- * loading a PCID-enabled CR3 with CR4.PCIDE off will trigger
- * deterministically.
- */
-
-static inline unsigned long build_cr3(struct mm_struct *mm, u16 asid)
-{
-       if (static_cpu_has(X86_FEATURE_PCID)) {
-               VM_WARN_ON_ONCE(asid > 4094);
-               return __sme_pa(mm->pgd) | (asid + 1);
-       } else {
-               VM_WARN_ON_ONCE(asid != 0);
-               return __sme_pa(mm->pgd);
-       }
-}
-
-static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
-{
-       VM_WARN_ON_ONCE(asid > 4094);
-       return __sme_pa(mm->pgd) | (asid + 1) | CR3_NOFLUSH;
-}
-
 /*
  * This can be used from process context to figure out what the value of
  * CR3 is without needing to do a (slow) __read_cr3().
@@ -317,7 +346,7 @@ static inline unsigned long build_cr3_noflush(struct mm_struct *mm, u16 asid)
  */
 static inline unsigned long __get_current_cr3_fast(void)
 {
-       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm),
+       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
                this_cpu_read(cpu_tlbstate.loaded_mm_asid));
 
        /* For now, be very restrictive about when this can be called. */
index 5400add2885b9646fa7d8d53c5b35e16c2794c7d..8bf450b13d9f53883db9ee89ac6d39913963907e 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/nmi.h>
 #include <asm/io.h>
 #include <asm/hyperv.h>
+#include <asm/nospec-branch.h>
 
 /*
  * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
                return U64_MAX;
 
        __asm__ __volatile__("mov %4, %%r8\n"
-                            "call *%5"
+                            CALL_NOSPEC
                             : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                               "+c" (control), "+d" (input_address)
-                            :  "r" (output_address), "m" (hv_hypercall_pg)
+                            :  "r" (output_address),
+                               THUNK_TARGET(hv_hypercall_pg)
                             : "cc", "memory", "r8", "r9", "r10", "r11");
 #else
        u32 input_address_hi = upper_32_bits(input_address);
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
        if (!hv_hypercall_pg)
                return U64_MAX;
 
-       __asm__ __volatile__("call *%7"
+       __asm__ __volatile__(CALL_NOSPEC
                             : "=A" (hv_status),
                               "+c" (input_address_lo), ASM_CALL_CONSTRAINT
                             : "A" (control),
                               "b" (input_address_hi),
                               "D"(output_address_hi), "S"(output_address_lo),
-                              "m" (hv_hypercall_pg)
+                              THUNK_TARGET(hv_hypercall_pg)
                             : "cc", "memory");
 #endif /* !x86_64 */
        return hv_status;
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
 
 #ifdef CONFIG_X86_64
        {
-               __asm__ __volatile__("call *%4"
+               __asm__ __volatile__(CALL_NOSPEC
                                     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
                                       "+c" (control), "+d" (input1)
-                                    : "m" (hv_hypercall_pg)
+                                    : THUNK_TARGET(hv_hypercall_pg)
                                     : "cc", "r8", "r9", "r10", "r11");
        }
 #else
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
                u32 input1_hi = upper_32_bits(input1);
                u32 input1_lo = lower_32_bits(input1);
 
-               __asm__ __volatile__ ("call *%5"
+               __asm__ __volatile__ (CALL_NOSPEC
                                      : "=A"(hv_status),
                                        "+c"(input1_lo),
                                        ASM_CALL_CONSTRAINT
                                      : "A" (control),
                                        "b" (input1_hi),
-                                       "m" (hv_hypercall_pg)
+                                       THUNK_TARGET(hv_hypercall_pg)
                                      : "cc", "edi", "esi");
        }
 #endif
index 34c4922bbc3fb5ed95cb0819d7ac6b9cd37a7f41..e7b983a355060a6c5b5db76f1fc18475eb647ae5 100644 (file)
 #define FAM10H_MMIO_CONF_BASE_MASK     0xfffffffULL
 #define FAM10H_MMIO_CONF_BASE_SHIFT    20
 #define MSR_FAM10H_NODE_ID             0xc001100c
+#define MSR_F10H_DECFG                 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT    1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE                BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
 
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1                        0xc001001a
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644 (file)
index 0000000..402a11c
--- /dev/null
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS         16      /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version â€” two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)      \
+       mov     $(nr/2), reg;                   \
+771:                                           \
+       call    772f;                           \
+773:   /* speculation trap */                  \
+       pause;                                  \
+       jmp     773b;                           \
+772:                                           \
+       call    774f;                           \
+775:   /* speculation trap */                  \
+       pause;                                  \
+       jmp     775b;                           \
+774:                                           \
+       dec     reg;                            \
+       jnz     771b;                           \
+       add     $(BITS_PER_LONG/8) * nr, sp;
+
+#ifdef __ASSEMBLY__
+
+/*
+ * This should be used immediately before a retpoline alternative.  It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+.macro ANNOTATE_NOSPEC_ALTERNATIVE
+       .Lannotate_\@:
+       .pushsection .discard.nospec
+       .long .Lannotate_\@ - .
+       .popsection
+.endm
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+       call    .Ldo_rop_\@
+.Lspec_trap_\@:
+       pause
+       jmp     .Lspec_trap_\@
+.Ldo_rop_\@:
+       mov     \reg, (%_ASM_SP)
+       ret
+.endm
+
+/*
+ * This is a wrapper around RETPOLINE_JMP so the called function in reg
+ * returns to the instruction after the macro.
+ */
+.macro RETPOLINE_CALL reg:req
+       jmp     .Ldo_call_\@
+.Ldo_retpoline_jmp_\@:
+       RETPOLINE_JMP \reg
+.Ldo_call_\@:
+       call    .Ldo_retpoline_jmp_\@
+.endm
+
+/*
+ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro JMP_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE_2 __stringify(jmp *\reg),                           \
+               __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+               __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+       jmp     *\reg
+#endif
+.endm
+
+.macro CALL_NOSPEC reg:req
+#ifdef CONFIG_RETPOLINE
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE_2 __stringify(call *\reg),                          \
+               __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+               __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+       call    *\reg
+#endif
+.endm
+
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+       ANNOTATE_NOSPEC_ALTERNATIVE
+       ALTERNATIVE "jmp .Lskip_rsb_\@",                                \
+               __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))    \
+               \ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define ANNOTATE_NOSPEC_ALTERNATIVE                            \
+       "999:\n\t"                                              \
+       ".pushsection .discard.nospec\n\t"                      \
+       ".long 999b - .\n\t"                                    \
+       ".popsection\n\t"
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define CALL_NOSPEC                                           \
+       ANNOTATE_NOSPEC_ALTERNATIVE                             \
+       ALTERNATIVE(                                            \
+       "call *%[thunk_target]\n",                              \
+       "call __x86_indirect_thunk_%V[thunk_target]\n",         \
+       X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n",    \
+       "       jmp    904f;\n"                                 \
+       "       .align 16\n"                                    \
+       "901:   call   903f;\n"                                 \
+       "902:   pause;\n"                                       \
+       "       jmp    902b;\n"                                 \
+       "       .align 16\n"                                    \
+       "903:   addl   $4, %%esp;\n"                            \
+       "       pushl  %[thunk_target];\n"                      \
+       "       ret;\n"                                         \
+       "       .align 16\n"                                    \
+       "904:   call   901b;\n",                                \
+       X86_FEATURE_RETPOLINE)
+
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline for C / inline asm */
+# define CALL_NOSPEC "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+/* The Spectre V2 mitigation variants */
+enum spectre_v2_mitigation {
+       SPECTRE_V2_NONE,
+       SPECTRE_V2_RETPOLINE_MINIMAL,
+       SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
+       SPECTRE_V2_RETPOLINE_GENERIC,
+       SPECTRE_V2_RETPOLINE_AMD,
+       SPECTRE_V2_IBRS,
+};
+
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+       unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+       asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+                     ALTERNATIVE("jmp 910f",
+                                 __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+                                 X86_FEATURE_RETPOLINE)
+                     "910:"
+                     : "=&r" (loops), ASM_CALL_CONSTRAINT
+                     : "r" (loops) : "memory" );
+#endif
+}
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
index 283efcaac8aff86f2c004bc23e4b8642cbf3d527..892df375b6155a51f584760efb9f9e77c3f732e8 100644 (file)
@@ -927,6 +927,15 @@ extern void default_banner(void);
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
                  CLBR_NONE,                                            \
                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+
+#ifdef CONFIG_DEBUG_ENTRY
+#define SAVE_FLAGS(clobbers)                                        \
+       PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
+                 PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);        \
+                 call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl);    \
+                 PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+#endif
+
 #endif /* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
index 7a5d6695abd37e737bfb0e532ec4d86c53256433..eb66fa9cd0fc6187d55dc0c1a8fb30e5e32687a8 100644 (file)
@@ -38,6 +38,7 @@ do {                                          \
 #define PCI_NOASSIGN_ROMS      0x80000
 #define PCI_ROOT_NO_CRS                0x100000
 #define PCI_NOASSIGN_BARS      0x200000
+#define PCI_BIG_ROOT_WINDOW    0x400000
 
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
index 4b5e1eafada731cdedd48d771d79d55e766a84eb..aff42e1da6ee1591bbec15ce6f0543953789d19c 100644 (file)
@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
  */
 extern gfp_t __userpte_alloc_gfp;
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Instead of one PGD, we acquire two PGDs.  Being order-1, it is
+ * both 8k in size and 8k-aligned.  That lets us just flip bit 12
+ * in a pointer to swap between the two 4k halves.
+ */
+#define PGD_ALLOCATION_ORDER 1
+#else
+#define PGD_ALLOCATION_ORDER 0
+#endif
+
 /*
  * Allocate and free page tables.
  */
index 95e2dfd755218ccfaf6417b44c822b545a35568e..e42b8943cb1a311a00ddceb36129ede3012489ef 100644 (file)
@@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
 int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
 
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
 void ptdump_walk_pgd_level_checkwx(void);
 
 #ifdef CONFIG_DEBUG_WX
@@ -841,7 +842,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 
 static inline int p4d_bad(p4d_t p4d)
 {
-       return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
+       unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
+
+       if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+               ignore_flags |= _PAGE_NX;
+
+       return (p4d_flags(p4d) & ~ignore_flags) != 0;
 }
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
@@ -875,7 +881,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
 
 static inline int pgd_bad(pgd_t pgd)
 {
-       return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
+       unsigned long ignore_flags = _PAGE_USER;
+
+       if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+               ignore_flags |= _PAGE_NX;
+
+       return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
 }
 
 static inline int pgd_none(pgd_t pgd)
@@ -904,7 +915,11 @@ static inline int pgd_none(pgd_t pgd)
  * pgd_offset() returns a (pgd_t *)
  * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
  */
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
+/*
+ * a shortcut to get a pgd_t in a given mm
+ */
+#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
 /*
  * a shortcut which implies the use of the kernel's pgd, instead
  * of a process's
@@ -1106,7 +1121,14 @@ static inline int pud_write(pud_t pud)
  */
 static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
 {
-       memcpy(dst, src, count * sizeof(pgd_t));
+       memcpy(dst, src, count * sizeof(pgd_t));
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+       /* Clone the user space pgd as well */
+       memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
+              count * sizeof(pgd_t));
+#endif
 }
 
 #define PTE_SHIFT ilog2(PTRS_PER_PTE)
index f2ca9b28fd68303f4494775564aa9da77ddcd53a..ce245b0cdfcaa42bd932a387bbb189ee7349bfef 100644 (file)
@@ -38,13 +38,22 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
 #define LAST_PKMAP 1024
 #endif
 
-#define PKMAP_BASE ((FIXADDR_START - PAGE_SIZE * (LAST_PKMAP + 1))     \
-                   & PMD_MASK)
+/*
+ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
+ * to avoid include recursion hell
+ */
+#define CPU_ENTRY_AREA_PAGES   (NR_CPUS * 40)
+
+#define CPU_ENTRY_AREA_BASE                            \
+       ((FIXADDR_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) & PMD_MASK)
+
+#define PKMAP_BASE             \
+       ((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
 
 #ifdef CONFIG_HIGHMEM
 # define VMALLOC_END   (PKMAP_BASE - 2 * PAGE_SIZE)
 #else
-# define VMALLOC_END   (FIXADDR_START - 2 * PAGE_SIZE)
+# define VMALLOC_END   (CPU_ENTRY_AREA_BASE - 2 * PAGE_SIZE)
 #endif
 
 #define MODULES_VADDR  VMALLOC_START
index e9f05331e732a057b341e78977d666b8cfe35289..81462e9a34f6af49645a08f55c7d67e0144dbb77 100644 (file)
@@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
 #endif
 }
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
+ * (8k-aligned and 8k in size).  The kernel one is at the beginning 4k and
+ * the user one is in the last 4k.  To switch between them, you
+ * just need to flip the 12th bit in their addresses.
+ */
+#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
+
+/*
+ * This generates better code than the inline assembly in
+ * __set_bit().
+ */
+static inline void *ptr_set_bit(void *ptr, int bit)
+{
+       unsigned long __ptr = (unsigned long)ptr;
+
+       __ptr |= BIT(bit);
+       return (void *)__ptr;
+}
+static inline void *ptr_clear_bit(void *ptr, int bit)
+{
+       unsigned long __ptr = (unsigned long)ptr;
+
+       __ptr &= ~BIT(bit);
+       return (void *)__ptr;
+}
+
+static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
+{
+       return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
+{
+       return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
+{
+       return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+
+static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
+{
+       return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
+}
+#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+
+/*
+ * Page table pages are page-aligned.  The lower half of the top
+ * level is used for userspace and the top half for the kernel.
+ *
+ * Returns true for parts of the PGD that map userspace and
+ * false for the parts that map the kernel.
+ */
+static inline bool pgdp_maps_userspace(void *__ptr)
+{
+       unsigned long ptr = (unsigned long)__ptr;
+
+       return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
+}
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
+
+/*
+ * Take a PGD location (pgdp) and a pgd value that needs to be set there.
+ * Populates the user and returns the resulting PGD that must be set in
+ * the kernel copy of the page tables.
+ */
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return pgd;
+       return __pti_set_user_pgd(pgdp, pgd);
+}
+#else
+static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+       return pgd;
+}
+#endif
+
 static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 {
+#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
+       p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
+#else
        *p4dp = p4d;
+#endif
 }
 
 static inline void native_p4d_clear(p4d_t *p4d)
@@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
 
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       *pgdp = pti_set_user_pgd(pgdp, pgd);
+#else
        *pgdp = pgd;
+#endif
 }
 
 static inline void native_pgd_clear(pgd_t *pgd)
index 6d5f45dcd4a13caafbf184f323d0725c2d5f53e4..6b8f73dcbc2c2b6a835e17ac0828d8b6484f4dce 100644 (file)
@@ -75,33 +75,52 @@ typedef struct { pteval_t pte; } pte_t;
 #define PGDIR_SIZE     (_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK     (~(PGDIR_SIZE - 1))
 
-/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#define MAXMEM         _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+/*
+ * See Documentation/x86/x86_64/mm.txt for a description of the memory map.
+ *
+ * Be very careful vs. KASLR when changing anything here. The KASLR address
+ * range must not overlap with anything except the KASAN shadow area, which
+ * is correct as KASAN disables KASLR.
+ */
+#define MAXMEM                 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+
 #ifdef CONFIG_X86_5LEVEL
-#define VMALLOC_SIZE_TB _AC(16384, UL)
-#define __VMALLOC_BASE _AC(0xff92000000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
+# define VMALLOC_SIZE_TB       _AC(12800, UL)
+# define __VMALLOC_BASE                _AC(0xffa0000000000000, UL)
+# define __VMEMMAP_BASE                _AC(0xffd4000000000000, UL)
+# define LDT_PGD_ENTRY         _AC(-112, UL)
+# define LDT_BASE_ADDR         (LDT_PGD_ENTRY << PGDIR_SHIFT)
 #else
-#define VMALLOC_SIZE_TB        _AC(32, UL)
-#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
+# define VMALLOC_SIZE_TB       _AC(32, UL)
+# define __VMALLOC_BASE                _AC(0xffffc90000000000, UL)
+# define __VMEMMAP_BASE                _AC(0xffffea0000000000, UL)
+# define LDT_PGD_ENTRY         _AC(-3, UL)
+# define LDT_BASE_ADDR         (LDT_PGD_ENTRY << PGDIR_SHIFT)
 #endif
+
 #ifdef CONFIG_RANDOMIZE_MEMORY
-#define VMALLOC_START  vmalloc_base
-#define VMEMMAP_START  vmemmap_base
+# define VMALLOC_START         vmalloc_base
+# define VMEMMAP_START         vmemmap_base
 #else
-#define VMALLOC_START  __VMALLOC_BASE
-#define VMEMMAP_START  __VMEMMAP_BASE
+# define VMALLOC_START         __VMALLOC_BASE
+# define VMEMMAP_START         __VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
-#define VMALLOC_END    (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+
+#define VMALLOC_END            (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+
+#define MODULES_VADDR          (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
 /* The module sections ends with the start of the fixmap */
-#define MODULES_END   __fix_to_virt(__end_of_fixed_addresses + 1)
-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
-#define ESPFIX_PGD_ENTRY _AC(-2, UL)
-#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
-#define EFI_VA_START    ( -4 * (_AC(1, UL) << 30))
-#define EFI_VA_END      (-68 * (_AC(1, UL) << 30))
+#define MODULES_END            _AC(0xffffffffff000000, UL)
+#define MODULES_LEN            (MODULES_END - MODULES_VADDR)
+
+#define ESPFIX_PGD_ENTRY       _AC(-2, UL)
+#define ESPFIX_BASE_ADDR       (ESPFIX_PGD_ENTRY << P4D_SHIFT)
+
+#define CPU_ENTRY_AREA_PGD     _AC(-4, UL)
+#define CPU_ENTRY_AREA_BASE    (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
+
+#define EFI_VA_START           ( -4 * (_AC(1, UL) << 30))
+#define EFI_VA_END             (-68 * (_AC(1, UL) << 30))
 
 #define EARLY_DYNAMIC_PAGE_TABLES      64
 
index 43212a43ee69feea1de27275d9075c566cdfcd2c..625a52a5594f53b3ddc889843b92d228cc39a8e8 100644 (file)
 #define CR3_ADDR_MASK  __sme_clr(0x7FFFFFFFFFFFF000ull)
 #define CR3_PCID_MASK  0xFFFull
 #define CR3_NOFLUSH    BIT_ULL(63)
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define X86_CR3_PTI_PCID_USER_BIT     11
+#endif
+
 #else
 /*
  * CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
index cc16fa882e3e760a40351cf3e7476ac9f25ffe00..d3a67fba200ae2a5c03f52a7815dca00b43c63ad 100644 (file)
@@ -163,9 +163,9 @@ enum cpuid_regs_idx {
 extern struct cpuinfo_x86      boot_cpu_data;
 extern struct cpuinfo_x86      new_cpu_data;
 
-extern struct tss_struct       doublefault_tss;
-extern __u32                   cpu_caps_cleared[NCAPINTS];
-extern __u32                   cpu_caps_set[NCAPINTS];
+extern struct x86_hw_tss       doublefault_tss;
+extern __u32                   cpu_caps_cleared[NCAPINTS + NBUGINTS];
+extern __u32                   cpu_caps_set[NCAPINTS + NBUGINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
@@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
        write_cr3(__sme_pa(pgdir));
 }
 
+/*
+ * Note that while the legacy 'TSS' name comes from 'Task State Segment',
+ * on modern x86 CPUs the TSS also holds information important to 64-bit mode,
+ * unrelated to the task-switch mechanism:
+ */
 #ifdef CONFIG_X86_32
 /* This is the TSS defined by the hardware. */
 struct x86_hw_tss {
@@ -305,7 +310,13 @@ struct x86_hw_tss {
 struct x86_hw_tss {
        u32                     reserved1;
        u64                     sp0;
+
+       /*
+        * We store cpu_current_top_of_stack in sp1 so it's always accessible.
+        * Linux does not use ring 1, so sp1 is not otherwise needed.
+        */
        u64                     sp1;
+
        u64                     sp2;
        u64                     reserved2;
        u64                     ist[7];
@@ -323,12 +334,22 @@ struct x86_hw_tss {
 #define IO_BITMAP_BITS                 65536
 #define IO_BITMAP_BYTES                        (IO_BITMAP_BITS/8)
 #define IO_BITMAP_LONGS                        (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET               offsetof(struct tss_struct, io_bitmap)
+#define IO_BITMAP_OFFSET               (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
 #define INVALID_IO_BITMAP_OFFSET       0x8000
 
+struct entry_stack {
+       unsigned long           words[64];
+};
+
+struct entry_stack_page {
+       struct entry_stack stack;
+} __aligned(PAGE_SIZE);
+
 struct tss_struct {
        /*
-        * The hardware state:
+        * The fixed hardware portion.  This must not cross a page boundary
+        * at risk of violating the SDM's advice and potentially triggering
+        * errata.
         */
        struct x86_hw_tss       x86_tss;
 
@@ -339,18 +360,9 @@ struct tss_struct {
         * be within the limit.
         */
        unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
+} __aligned(PAGE_SIZE);
 
-#ifdef CONFIG_X86_32
-       /*
-        * Space for the temporary SYSENTER stack.
-        */
-       unsigned long           SYSENTER_stack_canary;
-       unsigned long           SYSENTER_stack[64];
-#endif
-
-} ____cacheline_aligned;
-
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
 
 /*
  * sizeof(unsigned long) coming from an extra "long" at the end
@@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
 
 #ifdef CONFIG_X86_32
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#else
+/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
+#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
 #endif
 
 /*
@@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
 static inline void
 native_load_sp0(unsigned long sp0)
 {
-       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+       this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
 static inline void native_swapgs(void)
@@ -535,12 +550,12 @@ static inline void native_swapgs(void)
 
 static inline unsigned long current_top_of_stack(void)
 {
-#ifdef CONFIG_X86_64
-       return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
-#else
-       /* sp0 on x86_32 is special in and around vm86 mode. */
+       /*
+        *  We can't read directly from tss.sp0: sp0 on x86_32 is special in
+        *  and around vm86 mode and sp0 on x86_64 is special because of the
+        *  entry trampoline.
+        */
        return this_cpu_read_stable(cpu_current_top_of_stack);
-#endif
 }
 
 static inline bool on_thread_stack(void)
@@ -837,13 +852,22 @@ static inline void spin_lock_prefetch(const void *x)
 
 #else
 /*
- * User space process size. 47bits minus one guard page.  The guard
- * page is necessary on Intel CPUs: if a SYSCALL instruction is at
- * the highest possible canonical userspace address, then that
- * syscall will enter the kernel with a non-canonical return
- * address, and SYSRET will explode dangerously.  We avoid this
- * particular problem by preventing anything from being mapped
- * at the maximum canonical address.
+ * User space process size.  This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything executable
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen.  This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
  */
 #define TASK_SIZE_MAX  ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
 
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
new file mode 100644 (file)
index 0000000..0b5ef05
--- /dev/null
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _ASM_X86_PTI_H
+#define _ASM_X86_PTI_H
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+extern void pti_init(void);
+extern void pti_check_boottime_disable(void);
+#else
+static inline void pti_check_boottime_disable(void) { }
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PTI_H */
index 8da111b3c342bbb61a9e630e101c8a83422a15ea..f737068787729f045a578776845231b0a0ee3e0d 100644 (file)
@@ -16,6 +16,7 @@ enum stack_type {
        STACK_TYPE_TASK,
        STACK_TYPE_IRQ,
        STACK_TYPE_SOFTIRQ,
+       STACK_TYPE_ENTRY,
        STACK_TYPE_EXCEPTION,
        STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
 };
@@ -28,6 +29,8 @@ struct stack_info {
 bool in_task_stack(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info);
 
+bool in_entry_stack(unsigned long *stack, struct stack_info *info);
+
 int get_stack_info(unsigned long *stack, struct task_struct *task,
                   struct stack_info *info, unsigned long *visit_mask);
 
index 8c6bd6863db9d6b737cd0649324c154f9b9798a3..eb5f7999a8934c7af8b6c5b170b05e5772264783 100644 (file)
@@ -16,8 +16,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
                      struct tss_struct *tss);
 
 /* This runs runs on the previous thread's stack. */
-static inline void prepare_switch_to(struct task_struct *prev,
-                                    struct task_struct *next)
+static inline void prepare_switch_to(struct task_struct *next)
 {
 #ifdef CONFIG_VMAP_STACK
        /*
@@ -70,7 +69,7 @@ struct fork_frame {
 
 #define switch_to(prev, next, last)                                    \
 do {                                                                   \
-       prepare_switch_to(prev, next);                                  \
+       prepare_switch_to(next);                                        \
                                                                        \
        ((last) = __switch_to_asm((prev), (next)));                     \
 } while (0)
@@ -79,10 +78,10 @@ do {                                                                        \
 static inline void refresh_sysenter_cs(struct thread_struct *thread)
 {
        /* Only happens when SEP is enabled, no need to test "SEP"arately: */
-       if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+       if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
                return;
 
-       this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+       this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
        wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
 }
 #endif
@@ -90,10 +89,12 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
 /* This is used when switching tasks or entering/exiting vm86 mode. */
 static inline void update_sp0(struct task_struct *task)
 {
+       /* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
 #ifdef CONFIG_X86_32
        load_sp0(task->thread.sp0);
 #else
-       load_sp0(task_top_of_stack(task));
+       if (static_cpu_has(X86_FEATURE_XENPV))
+               load_sp0(task_top_of_stack(task));
 #endif
 }
 
index 70f425947dc50f3e99ca639c0ead0d7e1cce636d..00223333821a96616647a9cbb6fe729c4a18b7b6 100644 (file)
@@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
 #else /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_64
-# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
+# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
 #endif
 
 #endif
index 877b5c1a1b1247116e20e7272dbade77e1874fc4..d33e4a26dc7ed2db51687ddb4ca7ee3a35ddf835 100644 (file)
 #include <asm/cpufeature.h>
 #include <asm/special_insns.h>
 #include <asm/smp.h>
+#include <asm/invpcid.h>
+#include <asm/pti.h>
+#include <asm/processor-flags.h>
 
-static inline void __invpcid(unsigned long pcid, unsigned long addr,
-                            unsigned long type)
-{
-       struct { u64 d[2]; } desc = { { pcid, addr } };
+/*
+ * The x86 feature is called PCID (Process Context IDentifier). It is similar
+ * to what is traditionally called ASID on the RISC processors.
+ *
+ * We don't use the traditional ASID implementation, where each process/mm gets
+ * its own ASID and flush/restart when we run out of ASID space.
+ *
+ * Instead we have a small per-cpu array of ASIDs and cache the last few mm's
+ * that came by on this CPU, allowing cheaper switch_mm between processes on
+ * this CPU.
+ *
+ * We end up with different spaces for different things. To avoid confusion we
+ * use different names for each of them:
+ *
+ * ASID  - [0, TLB_NR_DYN_ASIDS-1]
+ *         the canonical identifier for an mm
+ *
+ * kPCID - [1, TLB_NR_DYN_ASIDS]
+ *         the value we write into the PCID part of CR3; corresponds to the
+ *         ASID+1, because PCID 0 is special.
+ *
+ * uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
+ *         for KPTI each mm has two address spaces and thus needs two
+ *         PCID values, but we can still do with a single ASID denomination
+ *         for each mm. Corresponds to kPCID + 2048.
+ *
+ */
 
-       /*
-        * The memory clobber is because the whole point is to invalidate
-        * stale TLB entries and, especially if we're flushing global
-        * mappings, we don't want the compiler to reorder any subsequent
-        * memory accesses before the TLB flush.
-        *
-        * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
-        * invpcid (%rcx), %rax in long mode.
-        */
-       asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
-                     : : "m" (desc), "a" (type), "c" (&desc) : "memory");
-}
+/* There are 12 bits of space for ASIDS in CR3 */
+#define CR3_HW_ASID_BITS               12
 
-#define INVPCID_TYPE_INDIV_ADDR                0
-#define INVPCID_TYPE_SINGLE_CTXT       1
-#define INVPCID_TYPE_ALL_INCL_GLOBAL   2
-#define INVPCID_TYPE_ALL_NON_GLOBAL    3
+/*
+ * When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
+ * user/kernel switches
+ */
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define PTI_CONSUMED_PCID_BITS        1
+#else
+# define PTI_CONSUMED_PCID_BITS        0
+#endif
 
-/* Flush all mappings for a given pcid and addr, not including globals. */
-static inline void invpcid_flush_one(unsigned long pcid,
-                                    unsigned long addr)
-{
-       __invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
-}
+#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
+
+/*
+ * ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid.  -1 below to account
+ * for them being zero-based.  Another -1 is because PCID 0 is reserved for
+ * use by non-PCID-aware users.
+ */
+#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
+
+/*
+ * 6 because 6 should be plenty and struct tlb_state will fit in two cache
+ * lines.
+ */
+#define TLB_NR_DYN_ASIDS       6
 
-/* Flush all mappings for a given PCID, not including globals. */
-static inline void invpcid_flush_single_context(unsigned long pcid)
+/*
+ * Given @asid, compute kPCID
+ */
+static inline u16 kern_pcid(u16 asid)
 {
-       __invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       /*
+        * Make sure that the dynamic ASID space does not confict with the
+        * bit we are using to switch between user and kernel ASIDs.
+        */
+       BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_PCID_USER_BIT));
+
+       /*
+        * The ASID being passed in here should have respected the
+        * MAX_ASID_AVAILABLE and thus never have the switch bit set.
+        */
+       VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_PCID_USER_BIT));
+#endif
+       /*
+        * The dynamically-assigned ASIDs that get passed in are small
+        * (<TLB_NR_DYN_ASIDS).  They never have the high switch bit set,
+        * so do not bother to clear it.
+        *
+        * If PCID is on, ASID-aware code paths put the ASID+1 into the
+        * PCID bits.  This serves two purposes.  It prevents a nasty
+        * situation in which PCID-unaware code saves CR3, loads some other
+        * value (with PCID == 0), and then restores CR3, thus corrupting
+        * the TLB for ASID 0 if the saved ASID was nonzero.  It also means
+        * that any bugs involving loading a PCID-enabled CR3 with
+        * CR4.PCIDE off will trigger deterministically.
+        */
+       return asid + 1;
 }
 
-/* Flush all mappings, including globals, for all PCIDs. */
-static inline void invpcid_flush_all(void)
+/*
+ * Given @asid, compute uPCID
+ */
+static inline u16 user_pcid(u16 asid)
 {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
+       u16 ret = kern_pcid(asid);
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       ret |= 1 << X86_CR3_PTI_PCID_USER_BIT;
+#endif
+       return ret;
 }
 
-/* Flush all mappings for all PCIDs except globals. */
-static inline void invpcid_flush_all_nonglobals(void)
+struct pgd_t;
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
 {
-       __invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
+       if (static_cpu_has(X86_FEATURE_PCID)) {
+               return __sme_pa(pgd) | kern_pcid(asid);
+       } else {
+               VM_WARN_ON_ONCE(asid != 0);
+               return __sme_pa(pgd);
+       }
 }
 
-static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 {
-       u64 new_tlb_gen;
-
-       /*
-        * Bump the generation count.  This also serves as a full barrier
-        * that synchronizes with switch_mm(): callers are required to order
-        * their read of mm_cpumask after their writes to the paging
-        * structures.
-        */
-       smp_mb__before_atomic();
-       new_tlb_gen = atomic64_inc_return(&mm->context.tlb_gen);
-       smp_mb__after_atomic();
-
-       return new_tlb_gen;
+       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+       VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID));
+       return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
 }
 
 #ifdef CONFIG_PARAVIRT
@@ -99,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
        return !static_cpu_has(X86_FEATURE_PCID);
 }
 
-/*
- * 6 because 6 should be plenty and struct tlb_state will fit in
- * two cache lines.
- */
-#define TLB_NR_DYN_ASIDS 6
-
 struct tlb_context {
        u64 ctx_id;
        u64 tlb_gen;
@@ -138,6 +192,24 @@ struct tlb_state {
         */
        bool is_lazy;
 
+       /*
+        * If set we changed the page tables in such a way that we
+        * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
+        * This tells us to go invalidate all the non-loaded ctxs[]
+        * on the next context switch.
+        *
+        * The current ctx was kept up-to-date as it ran and does not
+        * need to be invalidated.
+        */
+       bool invalidate_other;
+
+       /*
+        * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
+        * the corresponding user PCID needs a flush next time we
+        * switch to it; see SWITCH_TO_USER_CR3.
+        */
+       unsigned short user_pcid_flush_mask;
+
        /*
         * Access to this CR4 shadow and to H/W CR4 is protected by
         * disabling interrupts when modifying either one.
@@ -218,6 +290,14 @@ static inline unsigned long cr4_read_shadow(void)
        return this_cpu_read(cpu_tlbstate.cr4);
 }
 
+/*
+ * Mark all other ASIDs as invalid, preserves the current.
+ */
+static inline void invalidate_other_asid(void)
+{
+       this_cpu_write(cpu_tlbstate.invalidate_other, true);
+}
+
 /*
  * Save some of cr4 feature set we're using (e.g.  Pentium 4MB
  * enable and PPro Global page enable), so that any CPU's that boot
@@ -237,37 +317,63 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
 
 extern void initialize_tlbstate_and_flush(void);
 
-static inline void __native_flush_tlb(void)
+/*
+ * Given an ASID, flush the corresponding user ASID.  We can delay this
+ * until the next time we switch to it.
+ *
+ * See SWITCH_TO_USER_CR3.
+ */
+static inline void invalidate_user_asid(u16 asid)
 {
+       /* There is no user ASID if address space separation is off */
+       if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
+               return;
+
        /*
-        * If current->mm == NULL then we borrow a mm which may change during a
-        * task switch and therefore we must not be preempted while we write CR3
-        * back:
+        * We only have a single ASID if PCID is off and the CR3
+        * write will have flushed it.
         */
-       preempt_disable();
-       native_write_cr3(__native_read_cr3());
-       preempt_enable();
+       if (!cpu_feature_enabled(X86_FEATURE_PCID))
+               return;
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       __set_bit(kern_pcid(asid),
+                 (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
 }
 
-static inline void __native_flush_tlb_global_irq_disabled(void)
+/*
+ * flush the entire current user mapping
+ */
+static inline void __native_flush_tlb(void)
 {
-       unsigned long cr4;
+       /*
+        * Preemption or interrupts must be disabled to protect the access
+        * to the per CPU variable and to prevent being preempted between
+        * read_cr3() and write_cr3().
+        */
+       WARN_ON_ONCE(preemptible());
 
-       cr4 = this_cpu_read(cpu_tlbstate.cr4);
-       /* clear PGE */
-       native_write_cr4(cr4 & ~X86_CR4_PGE);
-       /* write old PGE again and flush TLBs */
-       native_write_cr4(cr4);
+       invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+
+       /* If current->mm == NULL then the read_cr3() "borrows" an mm */
+       native_write_cr3(__native_read_cr3());
 }
 
+/*
+ * flush everything
+ */
 static inline void __native_flush_tlb_global(void)
 {
-       unsigned long flags;
+       unsigned long cr4, flags;
 
        if (static_cpu_has(X86_FEATURE_INVPCID)) {
                /*
                 * Using INVPCID is considerably faster than a pair of writes
                 * to CR4 sandwiched inside an IRQ flag save/restore.
+                *
+                * Note, this works with CR4.PCIDE=0 or 1.
                 */
                invpcid_flush_all();
                return;
@@ -280,36 +386,69 @@ static inline void __native_flush_tlb_global(void)
         */
        raw_local_irq_save(flags);
 
-       __native_flush_tlb_global_irq_disabled();
+       cr4 = this_cpu_read(cpu_tlbstate.cr4);
+       /* toggle PGE */
+       native_write_cr4(cr4 ^ X86_CR4_PGE);
+       /* write old PGE again and flush TLBs */
+       native_write_cr4(cr4);
 
        raw_local_irq_restore(flags);
 }
 
+/*
+ * flush one page in the user mapping
+ */
 static inline void __native_flush_tlb_single(unsigned long addr)
 {
+       u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+
        asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       /*
+        * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
+        * Just use invalidate_user_asid() in case we are called early.
+        */
+       if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
+               invalidate_user_asid(loaded_mm_asid);
+       else
+               invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
 }
 
+/*
+ * flush everything
+ */
 static inline void __flush_tlb_all(void)
 {
-       if (boot_cpu_has(X86_FEATURE_PGE))
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
                __flush_tlb_global();
-       else
+       } else {
+               /*
+                * !PGE -> !PCID (setup_pcid()), thus every flush is total.
+                */
                __flush_tlb();
-
-       /*
-        * Note: if we somehow had PCID but not PGE, then this wouldn't work --
-        * we'd end up flushing kernel translations for the current ASID but
-        * we might fail to flush kernel translations for other cached ASIDs.
-        *
-        * To avoid this issue, we force PCID off if PGE is off.
-        */
+       }
 }
 
+/*
+ * flush one page in the kernel mapping
+ */
 static inline void __flush_tlb_one(unsigned long addr)
 {
        count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
        __flush_tlb_single(addr);
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       /*
+        * __flush_tlb_single() will have cleared the TLB entry for this ASID,
+        * but since kernel space is replicated across all, we must also
+        * invalidate all others.
+        */
+       invalidate_other_asid();
 }
 
 #define TLB_FLUSH_ALL  -1UL
@@ -370,6 +509,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 void native_flush_tlb_others(const struct cpumask *cpumask,
                             const struct flush_tlb_info *info);
 
+static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
+{
+       /*
+        * Bump the generation count.  This also serves as a full barrier
+        * that synchronizes with switch_mm(): callers are required to order
+        * their read of mm_cpumask after their writes to the paging
+        * structures.
+        */
+       return atomic64_inc_return(&mm->context.tlb_gen);
+}
+
 static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
                                        struct mm_struct *mm)
 {
index 84b9ec0c1bc0867795c3a2d327f3b8831bf4f8ba..22647a642e98c41508dab9976a1ff4665b97a4d2 100644 (file)
@@ -283,34 +283,34 @@ TRACE_EVENT(vector_alloc_managed,
 DECLARE_EVENT_CLASS(vector_activate,
 
        TP_PROTO(unsigned int irq, bool is_managed, bool can_reserve,
-                bool early),
+                bool reserve),
 
-       TP_ARGS(irq, is_managed, can_reserve, early),
+       TP_ARGS(irq, is_managed, can_reserve, reserve),
 
        TP_STRUCT__entry(
                __field(        unsigned int,   irq             )
                __field(        bool,           is_managed      )
                __field(        bool,           can_reserve     )
-               __field(        bool,           early           )
+               __field(        bool,           reserve         )
        ),
 
        TP_fast_assign(
                __entry->irq            = irq;
                __entry->is_managed     = is_managed;
                __entry->can_reserve    = can_reserve;
-               __entry->early          = early;
+               __entry->reserve        = reserve;
        ),
 
-       TP_printk("irq=%u is_managed=%d can_reserve=%d early=%d",
+       TP_printk("irq=%u is_managed=%d can_reserve=%d reserve=%d",
                  __entry->irq, __entry->is_managed, __entry->can_reserve,
-                 __entry->early)
+                 __entry->reserve)
 );
 
 #define DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(name)                         \
 DEFINE_EVENT_FN(vector_activate, name,                                 \
        TP_PROTO(unsigned int irq, bool is_managed,                     \
-                bool can_reserve, bool early),                         \
-       TP_ARGS(irq, is_managed, can_reserve, early), NULL, NULL);      \
+                bool can_reserve, bool reserve),                       \
+       TP_ARGS(irq, is_managed, can_reserve, reserve), NULL, NULL);    \
 
 DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_activate);
 DEFINE_IRQ_VECTOR_ACTIVATE_EVENT(vector_deactivate);
index 1fadd310ff680ece697fa65a8db410c380a8547e..31051f35cbb768e452c4f76a60c5415a45f572e7 100644 (file)
@@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
 dotraplinkage void do_stack_segment(struct pt_regs *, long);
 #ifdef CONFIG_X86_64
 dotraplinkage void do_double_fault(struct pt_regs *, long);
-asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
 #endif
 dotraplinkage void do_general_protection(struct pt_regs *, long);
 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
index e9cc6fe1fc6f953c38ddcc61fcf06fd90d72ab04..1f86e1b0a5cdc1afeb4667e8c770ec81456e9fb4 100644 (file)
@@ -7,6 +7,9 @@
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
+#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
+#define IRET_FRAME_SIZE   (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
+
 struct unwind_state {
        struct stack_info stack_info;
        unsigned long stack_mask;
@@ -52,15 +55,28 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
 }
 
 #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+/*
+ * If 'partial' returns true, only the iret frame registers are valid.
+ */
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
+                                                   bool *partial)
 {
        if (unwind_done(state))
                return NULL;
 
+       if (partial) {
+#ifdef CONFIG_UNWINDER_ORC
+               *partial = !state->full_regs;
+#else
+               *partial = false;
+#endif
+       }
+
        return state->regs;
 }
 #else
-static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
+                                                   bool *partial)
 {
        return NULL;
 }
index d9a7c659009c94b2ee04b4ddcc94e5874cc403b0..b986b2ca688a0e4fa24ace613c7e465e909723fe 100644 (file)
@@ -7,6 +7,7 @@
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
 extern void map_vsyscall(void);
+extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
 
 /*
  * Called on instruction fetch fault in vsyscall page.
index 7cb282e9e58777aeb2ffd86b344d39a5189c5f84..bfd882617613923f0db79c4aae82690bda7a8d4f 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/smap.h>
+#include <asm/nospec-branch.h>
 
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
        __HYPERCALL_5ARG(a1, a2, a3, a4, a5);
 
        stac();
-       asm volatile("call *%[call]"
+       asm volatile(CALL_NOSPEC
                     : __HYPERCALL_5PARAM
-                    : [call] "a" (&hypercall_page[call])
+                    : [thunk_target] "a" (&hypercall_page[call])
                     : __HYPERCALL_CLOBBER5);
        clac();
 
index 7e1e730396ae08f5a267adaccf0c3ba46448f780..bcba3c643e63dced1c873ee5e1cdbfdd5d307928 100644 (file)
 #define X86_CR3_PWT            _BITUL(X86_CR3_PWT_BIT)
 #define X86_CR3_PCD_BIT                4 /* Page Cache Disable */
 #define X86_CR3_PCD            _BITUL(X86_CR3_PCD_BIT)
-#define X86_CR3_PCID_MASK      _AC(0x00000fff,UL) /* PCID Mask */
+
+#define X86_CR3_PCID_BITS      12
+#define X86_CR3_PCID_MASK      (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
+
+#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
+#define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
 
 /*
  * Intel CPU features in CR4
index dbaf14d69ebd510b86a4a0e9e83bbd1862d99d14..4817d743c26359c697559053a23518a5e25b241b 100644 (file)
@@ -344,9 +344,12 @@ done:
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
        unsigned long flags;
+       int i;
 
-       if (instr[0] != 0x90)
-               return;
+       for (i = 0; i < a->padlen; i++) {
+               if (instr[i] != 0x90)
+                       return;
+       }
 
        local_irq_save(flags);
        add_nops(instr + (a->instrlen - a->padlen), a->padlen);
index 6e272f3ea984a220975d1b7c144867840fb867a9..880441f2414610298002c34652b96e571772e08f 100644 (file)
@@ -2626,11 +2626,13 @@ static int __init apic_set_verbosity(char *arg)
                apic_verbosity = APIC_DEBUG;
        else if (strcmp("verbose", arg) == 0)
                apic_verbosity = APIC_VERBOSE;
+#ifdef CONFIG_X86_64
        else {
                pr_warning("APIC Verbosity level %s not recognised"
                        " use apic=verbose or apic=debug\n", arg);
                return -EINVAL;
        }
+#endif
 
        return 0;
 }
index aa85690e9b6416b797db8129c8f1cbd7606bf687..25a87028cb3fe9bb4a6e9eba757d50cd49f15620 100644 (file)
@@ -151,7 +151,7 @@ static struct apic apic_flat __ro_after_init = {
        .apic_id_valid                  = default_apic_id_valid,
        .apic_id_registered             = flat_apic_id_registered,
 
-       .irq_delivery_mode              = dest_LowestPrio,
+       .irq_delivery_mode              = dest_Fixed,
        .irq_dest_mode                  = 1, /* logical */
 
        .disable_esr                    = 0,
index 7b659c4480c91a680466cc961cee68754c633ada..5078b5ce63a7aaf4569368e6447c9a4423271109 100644 (file)
@@ -110,7 +110,7 @@ struct apic apic_noop __ro_after_init = {
        .apic_id_valid                  = default_apic_id_valid,
        .apic_id_registered             = noop_apic_id_registered,
 
-       .irq_delivery_mode              = dest_LowestPrio,
+       .irq_delivery_mode              = dest_Fixed,
        /* logical delivery broadcast to all CPUs: */
        .irq_dest_mode                  = 1,
 
index 201579dc52428edb5c3989102a432c14799d1e1f..8a79634214600ab02076cfb601dac780996b6f3f 100644 (file)
@@ -2988,7 +2988,7 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
 }
 
 int mp_irqdomain_activate(struct irq_domain *domain,
-                         struct irq_data *irq_data, bool early)
+                         struct irq_data *irq_data, bool reserve)
 {
        unsigned long flags;
 
index 9b18be76442236eab971cab86cbf366f9e1e134f..ce503c99f5c4cd67fddb5fafdda7e3e64b8e7690 100644 (file)
@@ -39,17 +39,13 @@ static void irq_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
                ((apic->irq_dest_mode == 0) ?
                        MSI_ADDR_DEST_MODE_PHYSICAL :
                        MSI_ADDR_DEST_MODE_LOGICAL) |
-               ((apic->irq_delivery_mode != dest_LowestPrio) ?
-                       MSI_ADDR_REDIRECTION_CPU :
-                       MSI_ADDR_REDIRECTION_LOWPRI) |
+               MSI_ADDR_REDIRECTION_CPU |
                MSI_ADDR_DEST_ID(cfg->dest_apicid);
 
        msg->data =
                MSI_DATA_TRIGGER_EDGE |
                MSI_DATA_LEVEL_ASSERT |
-               ((apic->irq_delivery_mode != dest_LowestPrio) ?
-                       MSI_DATA_DELIVERY_FIXED :
-                       MSI_DATA_DELIVERY_LOWPRI) |
+               MSI_DATA_DELIVERY_FIXED |
                MSI_DATA_VECTOR(cfg->vector);
 }
 
index fa22017de806504b79593ead951bffe4e5d5421b..02e8acb134f856faa0030bf1c007dc3ac1bd9ca3 100644 (file)
@@ -105,7 +105,7 @@ static struct apic apic_default __ro_after_init = {
        .apic_id_valid                  = default_apic_id_valid,
        .apic_id_registered             = default_apic_id_registered,
 
-       .irq_delivery_mode              = dest_LowestPrio,
+       .irq_delivery_mode              = dest_Fixed,
        /* logical delivery broadcast to all CPUs: */
        .irq_dest_mode                  = 1,
 
index 750449152b04b4feed3c98ddc772db91f003f818..f8b03bb8e72560a436dbad677ddeed30f19531cf 100644 (file)
@@ -184,6 +184,7 @@ static void reserve_irq_vector_locked(struct irq_data *irqd)
        irq_matrix_reserve(vector_matrix);
        apicd->can_reserve = true;
        apicd->has_reserved = true;
+       irqd_set_can_reserve(irqd);
        trace_vector_reserve(irqd->irq, 0);
        vector_assign_managed_shutdown(irqd);
 }
@@ -368,8 +369,18 @@ static int activate_reserved(struct irq_data *irqd)
        int ret;
 
        ret = assign_irq_vector_any_locked(irqd);
-       if (!ret)
+       if (!ret) {
                apicd->has_reserved = false;
+               /*
+                * Core might have disabled reservation mode after
+                * allocating the irq descriptor. Ideally this should
+                * happen before allocation time, but that would require
+                * completely convoluted ways of transporting that
+                * information.
+                */
+               if (!irqd_can_reserve(irqd))
+                       apicd->can_reserve = false;
+       }
        return ret;
 }
 
@@ -398,21 +409,21 @@ static int activate_managed(struct irq_data *irqd)
 }
 
 static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
-                              bool early)
+                              bool reserve)
 {
        struct apic_chip_data *apicd = apic_chip_data(irqd);
        unsigned long flags;
        int ret = 0;
 
        trace_vector_activate(irqd->irq, apicd->is_managed,
-                             apicd->can_reserve, early);
+                             apicd->can_reserve, reserve);
 
        /* Nothing to do for fixed assigned vectors */
        if (!apicd->can_reserve && !apicd->is_managed)
                return 0;
 
        raw_spin_lock_irqsave(&vector_lock, flags);
-       if (early || irqd_is_managed_and_shutdown(irqd))
+       if (reserve || irqd_is_managed_and_shutdown(irqd))
                vector_assign_managed_shutdown(irqd);
        else if (apicd->is_managed)
                ret = activate_managed(irqd);
@@ -478,6 +489,7 @@ static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
        } else {
                /* Release the vector */
                apicd->can_reserve = true;
+               irqd_set_can_reserve(irqd);
                clear_irq_vector(irqd);
                realloc = true;
        }
index 622f13ca8a943c270e70df6100561ce4c56d7db2..8b04234e010b2616e42bff6cbeabdc3c2087c13c 100644 (file)
@@ -184,7 +184,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
        .apic_id_valid                  = x2apic_apic_id_valid,
        .apic_id_registered             = x2apic_apic_id_registered,
 
-       .irq_delivery_mode              = dest_LowestPrio,
+       .irq_delivery_mode              = dest_Fixed,
        .irq_dest_mode                  = 1, /* logical */
 
        .disable_esr                    = 0,
index 8ea78275480dafeb702e11ba73364cd9e7c52f21..76417a9aab73c3f7e3376261eb9ec592b16adf3b 100644 (file)
@@ -17,6 +17,7 @@
 #include <asm/sigframe.h>
 #include <asm/bootparam.h>
 #include <asm/suspend.h>
+#include <asm/tlbflush.h>
 
 #ifdef CONFIG_XEN
 #include <xen/interface/xen.h>
@@ -93,4 +94,13 @@ void common(void) {
 
        BLANK();
        DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
+
+       /* TLB state for the entry code */
+       OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
+
+       /* Layout info for cpu_entry_area */
+       OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
+       OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
+       OFFSET(CPU_ENTRY_AREA_entry_stack, cpu_entry_area, entry_stack_page);
+       DEFINE(SIZEOF_entry_stack, sizeof(struct entry_stack));
 }
index dedf428b20b68b0a4748fc1ac3032193c9121362..fa1261eefa16e73cedf27aadb878753be693f919 100644 (file)
@@ -47,13 +47,8 @@ void foo(void)
        BLANK();
 
        /* Offset from the sysenter stack to tss.sp0 */
-       DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-              offsetofend(struct tss_struct, SYSENTER_stack));
-
-       /* Offset from cpu_tss to SYSENTER_stack */
-       OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
-       /* Size of SYSENTER_stack */
-       DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
+       DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
+              offsetofend(struct cpu_entry_area, entry_stack_page.stack));
 
 #ifdef CONFIG_CC_STACKPROTECTOR
        BLANK();
index 630212fa9b9da3f0498fc30d4c193c5926c43abb..bf51e51d808dd8914abd3b4bca69b37ce3ec023b 100644 (file)
@@ -23,6 +23,9 @@ int main(void)
 #ifdef CONFIG_PARAVIRT
        OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
        OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
+#ifdef CONFIG_DEBUG_ENTRY
+       OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
+#endif
        BLANK();
 #endif
 
@@ -63,6 +66,7 @@ int main(void)
 
        OFFSET(TSS_ist, tss_struct, x86_tss.ist);
        OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
+       OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
        BLANK();
 
 #ifdef CONFIG_CC_STACKPROTECTOR
index bcb75dc97d44075d2eecb3137b91f934072352b0..ea831c85819583c3dd0b0a48c2a31518eaa67b44 100644 (file)
@@ -829,8 +829,32 @@ static void init_amd(struct cpuinfo_x86 *c)
                set_cpu_cap(c, X86_FEATURE_K8);
 
        if (cpu_has(c, X86_FEATURE_XMM2)) {
-               /* MFENCE stops RDTSC speculation */
-               set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+               unsigned long long val;
+               int ret;
+
+               /*
+                * A serializing LFENCE has less overhead than MFENCE, so
+                * use it for execution serialization.  On families which
+                * don't have that MSR, LFENCE is already serializing.
+                * msr_set_bit() uses the safe accessors, too, even if the MSR
+                * is not present.
+                */
+               msr_set_bit(MSR_F10H_DECFG,
+                           MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT);
+
+               /*
+                * Verify that the MSR write was successful (could be running
+                * under a hypervisor) and only then assume that LFENCE is
+                * serializing.
+                */
+               ret = rdmsrl_safe(MSR_F10H_DECFG, &val);
+               if (!ret && (val & MSR_F10H_DECFG_LFENCE_SERIALIZE)) {
+                       /* A serializing LFENCE stops RDTSC speculation */
+                       set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+               } else {
+                       /* MFENCE stops RDTSC speculation */
+                       set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
+               }
        }
 
        /*
index ba0b2424c9b050100ac6eeef97c4aef45181d455..e4dc26185aa70bcdecf8ca932b6c720121819558 100644 (file)
  */
 #include <linux/init.h>
 #include <linux/utsname.h>
+#include <linux/cpu.h>
+
+#include <asm/nospec-branch.h>
+#include <asm/cmdline.h>
 #include <asm/bugs.h>
 #include <asm/processor.h>
 #include <asm/processor-flags.h>
@@ -20,6 +24,8 @@
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
 
+static void __init spectre_v2_select_mitigation(void);
+
 void __init check_bugs(void)
 {
        identify_boot_cpu();
@@ -29,6 +35,9 @@ void __init check_bugs(void)
                print_cpu_info(&boot_cpu_data);
        }
 
+       /* Select the proper spectre mitigation before patching alternatives */
+       spectre_v2_select_mitigation();
+
 #ifdef CONFIG_X86_32
        /*
         * Check whether we are able to run this kernel safely on SMP.
@@ -60,3 +69,179 @@ void __init check_bugs(void)
                set_memory_4k((unsigned long)__va(0), 1);
 #endif
 }
+
+/* The kernel command line selection */
+enum spectre_v2_mitigation_cmd {
+       SPECTRE_V2_CMD_NONE,
+       SPECTRE_V2_CMD_AUTO,
+       SPECTRE_V2_CMD_FORCE,
+       SPECTRE_V2_CMD_RETPOLINE,
+       SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+       SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+       [SPECTRE_V2_NONE]                       = "Vulnerable",
+       [SPECTRE_V2_RETPOLINE_MINIMAL]          = "Vulnerable: Minimal generic ASM retpoline",
+       [SPECTRE_V2_RETPOLINE_MINIMAL_AMD]      = "Vulnerable: Minimal AMD ASM retpoline",
+       [SPECTRE_V2_RETPOLINE_GENERIC]          = "Mitigation: Full generic retpoline",
+       [SPECTRE_V2_RETPOLINE_AMD]              = "Mitigation: Full AMD retpoline",
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Spectre V2 mitigation: " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+
+static void __init spec2_print_if_insecure(const char *reason)
+{
+       if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+               pr_info("%s\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+               pr_info("%s\n", reason);
+}
+
+static inline bool retp_compiler(void)
+{
+       return __is_defined(RETPOLINE);
+}
+
+static inline bool match_option(const char *arg, int arglen, const char *opt)
+{
+       int len = strlen(opt);
+
+       return len == arglen && !strncmp(arg, opt, len);
+}
+
+static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
+{
+       char arg[20];
+       int ret;
+
+       ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
+                                 sizeof(arg));
+       if (ret > 0)  {
+               if (match_option(arg, ret, "off")) {
+                       goto disable;
+               } else if (match_option(arg, ret, "on")) {
+                       spec2_print_if_secure("force enabled on command line.");
+                       return SPECTRE_V2_CMD_FORCE;
+               } else if (match_option(arg, ret, "retpoline")) {
+                       spec2_print_if_insecure("retpoline selected on command line.");
+                       return SPECTRE_V2_CMD_RETPOLINE;
+               } else if (match_option(arg, ret, "retpoline,amd")) {
+                       if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+                               pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
+                               return SPECTRE_V2_CMD_AUTO;
+                       }
+                       spec2_print_if_insecure("AMD retpoline selected on command line.");
+                       return SPECTRE_V2_CMD_RETPOLINE_AMD;
+               } else if (match_option(arg, ret, "retpoline,generic")) {
+                       spec2_print_if_insecure("generic retpoline selected on command line.");
+                       return SPECTRE_V2_CMD_RETPOLINE_GENERIC;
+               } else if (match_option(arg, ret, "auto")) {
+                       return SPECTRE_V2_CMD_AUTO;
+               }
+       }
+
+       if (!cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+               return SPECTRE_V2_CMD_AUTO;
+disable:
+       spec2_print_if_insecure("disabled on command line.");
+       return SPECTRE_V2_CMD_NONE;
+}
+
+static void __init spectre_v2_select_mitigation(void)
+{
+       enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
+       enum spectre_v2_mitigation mode = SPECTRE_V2_NONE;
+
+       /*
+        * If the CPU is not affected and the command line mode is NONE or AUTO
+        * then nothing to do.
+        */
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2) &&
+           (cmd == SPECTRE_V2_CMD_NONE || cmd == SPECTRE_V2_CMD_AUTO))
+               return;
+
+       switch (cmd) {
+       case SPECTRE_V2_CMD_NONE:
+               return;
+
+       case SPECTRE_V2_CMD_FORCE:
+               /* FALLTRHU */
+       case SPECTRE_V2_CMD_AUTO:
+               goto retpoline_auto;
+
+       case SPECTRE_V2_CMD_RETPOLINE_AMD:
+               if (IS_ENABLED(CONFIG_RETPOLINE))
+                       goto retpoline_amd;
+               break;
+       case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
+               if (IS_ENABLED(CONFIG_RETPOLINE))
+                       goto retpoline_generic;
+               break;
+       case SPECTRE_V2_CMD_RETPOLINE:
+               if (IS_ENABLED(CONFIG_RETPOLINE))
+                       goto retpoline_auto;
+               break;
+       }
+       pr_err("kernel not compiled with retpoline; no mitigation available!");
+       return;
+
+retpoline_auto:
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+       retpoline_amd:
+               if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+                       pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+                       goto retpoline_generic;
+               }
+               mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
+                                        SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+       } else {
+       retpoline_generic:
+               mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
+                                        SPECTRE_V2_RETPOLINE_MINIMAL;
+               setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+       }
+
+       spectre_v2_enabled = mode;
+       pr_info("%s\n", spectre_v2_strings[mode]);
+}
+
+#undef pr_fmt
+
+#ifdef CONFIG_SYSFS
+ssize_t cpu_show_meltdown(struct device *dev,
+                         struct device_attribute *attr, char *buf)
+{
+       if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               return sprintf(buf, "Not affected\n");
+       if (boot_cpu_has(X86_FEATURE_PTI))
+               return sprintf(buf, "Mitigation: PTI\n");
+       return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
+               return sprintf(buf, "Not affected\n");
+       return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+               return sprintf(buf, "Not affected\n");
+
+       return sprintf(buf, "%s\n", spectre_v2_strings[spectre_v2_enabled]);
+}
+#endif
index fa998ca8aa5aa5b4899dbe8a57c5b543f927009e..ef29ad001991d6acdd5b59cd707d85f9ff99f9ea 100644 (file)
@@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
        return NULL;            /* Not found */
 }
 
-__u32 cpu_caps_cleared[NCAPINTS];
-__u32 cpu_caps_set[NCAPINTS];
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
 
 void load_percpu_segment(int cpu)
 {
@@ -490,28 +490,23 @@ void load_percpu_segment(int cpu)
        load_stack_canary_segment();
 }
 
-/* Setup the fixmap mapping only once per-processor */
-static inline void setup_fixmap_gdt(int cpu)
-{
-#ifdef CONFIG_X86_64
-       /* On 64-bit systems, we use a read-only fixmap GDT. */
-       pgprot_t prot = PAGE_KERNEL_RO;
-#else
-       /*
-        * On native 32-bit systems, the GDT cannot be read-only because
-        * our double fault handler uses a task gate, and entering through
-        * a task gate needs to change an available TSS to busy.  If the GDT
-        * is read-only, that will triple fault.
-        *
-        * On Xen PV, the GDT must be read-only because the hypervisor requires
-        * it.
-        */
-       pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
-               PAGE_KERNEL_RO : PAGE_KERNEL;
+#ifdef CONFIG_X86_32
+/* The 32-bit entry code needs to find cpu_entry_area. */
+DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 #endif
 
-       __set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
-}
+#ifdef CONFIG_X86_64
+/*
+ * Special IST stacks which the CPU switches to when it calls
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
+ * limit), all of them are 4K, except the debug stack which
+ * is 8K.
+ */
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
+         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
+         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
+};
+#endif
 
 /* Load the original GDT from the per-cpu structure */
 void load_direct_gdt(int cpu)
@@ -747,7 +742,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
 {
        int i;
 
-       for (i = 0; i < NCAPINTS; i++) {
+       for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
                c->x86_capability[i] &= ~cpu_caps_cleared[i];
                c->x86_capability[i] |= cpu_caps_set[i];
        }
@@ -927,6 +922,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
        }
 
        setup_force_cpu_cap(X86_FEATURE_ALWAYS);
+
+       if (c->x86_vendor != X86_VENDOR_AMD)
+               setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+       setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
        fpu__init_system(c);
 
 #ifdef CONFIG_X86_32
@@ -1250,7 +1252,7 @@ void enable_sep_cpu(void)
                return;
 
        cpu = get_cpu();
-       tss = &per_cpu(cpu_tss, cpu);
+       tss = &per_cpu(cpu_tss_rw, cpu);
 
        /*
         * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
@@ -1259,11 +1261,7 @@ void enable_sep_cpu(void)
 
        tss->x86_tss.ss1 = __KERNEL_CS;
        wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
-
-       wrmsr(MSR_IA32_SYSENTER_ESP,
-             (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
-             0);
-
+       wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1), 0);
        wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
 
        put_cpu();
@@ -1357,25 +1355,22 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
-         [0 ... N_EXCEPTION_STACKS - 1]        = EXCEPTION_STKSZ,
-         [DEBUG_STACK - 1]                     = DEBUG_STKSZ
-};
-
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
-
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
+       extern char _entry_trampoline[];
+       extern char entry_SYSCALL_64_trampoline[];
+
+       int cpu = smp_processor_id();
+       unsigned long SYSCALL64_entry_trampoline =
+               (unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
+               (entry_SYSCALL_64_trampoline - _entry_trampoline);
+
        wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
-       wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
+       if (static_cpu_has(X86_FEATURE_PTI))
+               wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
+       else
+               wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
 
 #ifdef CONFIG_IA32_EMULATION
        wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
@@ -1386,7 +1381,7 @@ void syscall_init(void)
         * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
         */
        wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_entry_stack(cpu) + 1));
        wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
 #else
        wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
@@ -1530,7 +1525,7 @@ void cpu_init(void)
        if (cpu)
                load_ucode_ap();
 
-       t = &per_cpu(cpu_tss, cpu);
+       t = &per_cpu(cpu_tss_rw, cpu);
        oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1569,7 +1564,7 @@ void cpu_init(void)
         * set up and load the per-CPU TSS
         */
        if (!oist->ist[0]) {
-               char *estacks = per_cpu(exception_stacks, cpu);
+               char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
 
                for (v = 0; v < N_EXCEPTION_STACKS; v++) {
                        estacks += exception_stack_sizes[v];
@@ -1580,7 +1575,7 @@ void cpu_init(void)
                }
        }
 
-       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+       t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
        /*
         * <= is required because the CPU will access up to
@@ -1596,11 +1591,12 @@ void cpu_init(void)
        enter_lazy_tlb(&init_mm, me);
 
        /*
-        * Initialize the TSS.  Don't bother initializing sp0, as the initial
-        * task never enters user mode.
+        * Initialize the TSS.  sp0 points to the entry trampoline stack
+        * regardless of what task is running.
         */
-       set_tss_desc(cpu, t);
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
        load_TR_desc();
+       load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
 
        load_mm_ldt(&init_mm);
 
@@ -1612,7 +1608,6 @@ void cpu_init(void)
        if (is_uv_system())
                uv_cpu_init();
 
-       setup_fixmap_gdt(cpu);
        load_fixmap_gdt(cpu);
 }
 
@@ -1622,7 +1617,7 @@ void cpu_init(void)
 {
        int cpu = smp_processor_id();
        struct task_struct *curr = current;
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
 
        wait_for_master_cpu(cpu);
 
@@ -1657,12 +1652,12 @@ void cpu_init(void)
         * Initialize the TSS.  Don't bother initializing sp0, as the initial
         * task never enters user mode.
         */
-       set_tss_desc(cpu, t);
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
        load_TR_desc();
 
        load_mm_ldt(&init_mm);
 
-       t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
+       t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 
 #ifdef CONFIG_DOUBLEFAULT
        /* Set up doublefault TSS pointer in the GDT */
@@ -1674,7 +1669,6 @@ void cpu_init(void)
 
        fpu__init_cpu();
 
-       setup_fixmap_gdt(cpu);
        load_fixmap_gdt(cpu);
 }
 #endif
index 7dbcb7adf7975f7f29c38651c23c478ad315a34c..d9e460fc7a3b309327c8f2899cc54a7be2b9dc6b 100644 (file)
@@ -565,15 +565,6 @@ static void print_ucode(struct ucode_cpu_info *uci)
 }
 #else
 
-/*
- * Flush global tlb. We only do this in x86_64 where paging has been enabled
- * already and PGE should be enabled as well.
- */
-static inline void flush_tlb_early(void)
-{
-       __native_flush_tlb_global_irq_disabled();
-}
-
 static inline void print_ucode(struct ucode_cpu_info *uci)
 {
        struct microcode_intel *mc;
@@ -602,10 +593,6 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
        if (rev != mc->hdr.rev)
                return -1;
 
-#ifdef CONFIG_X86_64
-       /* Flush global tlb. This is precaution. */
-       flush_tlb_early();
-#endif
        uci->cpu_sig.rev = rev;
 
        if (early)
@@ -923,8 +910,17 @@ static bool is_blacklisted(unsigned int cpu)
 {
        struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-       if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
-               pr_err_once("late loading on model 79 is disabled.\n");
+       /*
+        * Late loading on model 79 with microcode revision less than 0x0b000021
+        * may result in a system hang. This behavior is documented in item
+        * BDF90, #334165 (Intel Xeon Processor E7-8800/4800 v4 Product Family).
+        */
+       if (c->x86 == 6 &&
+           c->x86_model == INTEL_FAM6_BROADWELL_X &&
+           c->x86_mask == 0x01 &&
+           c->microcode < 0x0b000021) {
+               pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
+               pr_err_once("Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
                return true;
        }
 
index 0e662c55ae902fedd5c78c1ed87a972b35a79856..0b8cedb20d6d92f2875a49292680c8cfecd5b044 100644 (file)
@@ -50,25 +50,23 @@ static void doublefault_fn(void)
                cpu_relax();
 }
 
-struct tss_struct doublefault_tss __cacheline_aligned = {
-       .x86_tss = {
-               .sp0            = STACK_START,
-               .ss0            = __KERNEL_DS,
-               .ldt            = 0,
-               .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
-
-               .ip             = (unsigned long) doublefault_fn,
-               /* 0x2 bit is always set */
-               .flags          = X86_EFLAGS_SF | 0x2,
-               .sp             = STACK_START,
-               .es             = __USER_DS,
-               .cs             = __KERNEL_CS,
-               .ss             = __KERNEL_DS,
-               .ds             = __USER_DS,
-               .fs             = __KERNEL_PERCPU,
-
-               .__cr3          = __pa_nodebug(swapper_pg_dir),
-       }
+struct x86_hw_tss doublefault_tss __cacheline_aligned = {
+       .sp0            = STACK_START,
+       .ss0            = __KERNEL_DS,
+       .ldt            = 0,
+       .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+
+       .ip             = (unsigned long) doublefault_fn,
+       /* 0x2 bit is always set */
+       .flags          = X86_EFLAGS_SF | 0x2,
+       .sp             = STACK_START,
+       .es             = __USER_DS,
+       .cs             = __KERNEL_CS,
+       .ss             = __KERNEL_DS,
+       .ds             = __USER_DS,
+       .fs             = __KERNEL_PERCPU,
+
+       .__cr3          = __pa_nodebug(swapper_pg_dir),
 };
 
 /* dummy for do_double_fault() call */
index f13b4c00a5de4b7a7b36c40d27311672bcc9d05c..afbecff161d162c11a82d0610ae406b910c3f791 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/nmi.h>
 #include <linux/sysfs.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
 
@@ -43,6 +44,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
        return true;
 }
 
+bool in_entry_stack(unsigned long *stack, struct stack_info *info)
+{
+       struct entry_stack *ss = cpu_entry_stack(smp_processor_id());
+
+       void *begin = ss;
+       void *end = ss + 1;
+
+       if ((void *)stack < begin || (void *)stack >= end)
+               return false;
+
+       info->type      = STACK_TYPE_ENTRY;
+       info->begin     = begin;
+       info->end       = end;
+       info->next_sp   = NULL;
+
+       return true;
+}
+
 static void printk_stack_address(unsigned long address, int reliable,
                                 char *log_lvl)
 {
@@ -50,6 +69,39 @@ static void printk_stack_address(unsigned long address, int reliable,
        printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
 }
 
+void show_iret_regs(struct pt_regs *regs)
+{
+       printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
+       printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
+               regs->sp, regs->flags);
+}
+
+static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
+                                 bool partial)
+{
+       /*
+        * These on_stack() checks aren't strictly necessary: the unwind code
+        * has already validated the 'regs' pointer.  The checks are done for
+        * ordering reasons: if the registers are on the next stack, we don't
+        * want to print them out yet.  Otherwise they'll be shown as part of
+        * the wrong stack.  Later, when show_trace_log_lvl() switches to the
+        * next stack, this function will be called again with the same regs so
+        * they can be printed in the right context.
+        */
+       if (!partial && on_stack(info, regs, sizeof(*regs))) {
+               __show_regs(regs, 0);
+
+       } else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
+                                      IRET_FRAME_SIZE)) {
+               /*
+                * When an interrupt or exception occurs in entry code, the
+                * full pt_regs might not have been saved yet.  In that case
+                * just print the iret frame.
+                */
+               show_iret_regs(regs);
+       }
+}
+
 void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                        unsigned long *stack, char *log_lvl)
 {
@@ -57,11 +109,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
        struct stack_info stack_info = {0};
        unsigned long visit_mask = 0;
        int graph_idx = 0;
+       bool partial;
 
        printk("%sCall Trace:\n", log_lvl);
 
        unwind_start(&state, task, regs, stack);
        stack = stack ? : get_stack_pointer(task, regs);
+       regs = unwind_get_entry_regs(&state, &partial);
 
        /*
         * Iterate through the stacks, starting with the current stack pointer.
@@ -71,31 +125,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
         * - task stack
         * - interrupt stack
         * - HW exception stacks (double fault, nmi, debug, mce)
+        * - entry stack
         *
-        * x86-32 can have up to three stacks:
+        * x86-32 can have up to four stacks:
         * - task stack
         * - softirq stack
         * - hardirq stack
+        * - entry stack
         */
-       for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+       for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
                const char *stack_name;
 
-               /*
-                * If we overflowed the task stack into a guard page, jump back
-                * to the bottom of the usable stack.
-                */
-               if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
-                       stack = task_stack_page(task);
-
-               if (get_stack_info(stack, task, &stack_info, &visit_mask))
-                       break;
+               if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
+                       /*
+                        * We weren't on a valid stack.  It's possible that
+                        * we overflowed a valid stack into a guard page.
+                        * See if the next page up is valid so that we can
+                        * generate some kind of backtrace if this happens.
+                        */
+                       stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
+                       if (get_stack_info(stack, task, &stack_info, &visit_mask))
+                               break;
+               }
 
                stack_name = stack_type_name(stack_info.type);
                if (stack_name)
                        printk("%s <%s>\n", log_lvl, stack_name);
 
-               if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-                       __show_regs(regs, 0);
+               if (regs)
+                       show_regs_if_on_stack(&stack_info, regs, partial);
 
                /*
                 * Scan the stack, printing any text addresses we find.  At the
@@ -119,7 +177,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 
                        /*
                         * Don't print regs->ip again if it was already printed
-                        * by __show_regs() below.
+                        * by show_regs_if_on_stack().
                         */
                        if (regs && stack == &regs->ip)
                                goto next;
@@ -154,9 +212,9 @@ next:
                        unwind_next_frame(&state);
 
                        /* if the frame has entry regs, print them */
-                       regs = unwind_get_entry_regs(&state);
-                       if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
-                               __show_regs(regs, 0);
+                       regs = unwind_get_entry_regs(&state, &partial);
+                       if (regs)
+                               show_regs_if_on_stack(&stack_info, regs, partial);
                }
 
                if (stack_name)
@@ -252,11 +310,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
        unsigned long sp;
 #endif
        printk(KERN_DEFAULT
-              "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
+              "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
               IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
               debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
-              IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "");
+              IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "",
+              IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
+              (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
 
        if (notify_die(DIE_OOPS, str, regs, err,
                        current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
index daefae83a3aa86c59602b75bd3e6734c6e3b1030..04170f63e3a1d567caac3deea641e014b7e10823 100644 (file)
@@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
        if (type == STACK_TYPE_SOFTIRQ)
                return "SOFTIRQ";
 
+       if (type == STACK_TYPE_ENTRY)
+               return "ENTRY_TRAMPOLINE";
+
        return NULL;
 }
 
@@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
        if (task != current)
                goto unknown;
 
+       if (in_entry_stack(stack, info))
+               goto recursion_check;
+
        if (in_hardirq_stack(stack, info))
                goto recursion_check;
 
index 88ce2ffdb110303502ad33e64d357d8af5afd8c6..563e28d14f2ca157178d9de3a139d8370aaf89fe 100644 (file)
@@ -37,6 +37,15 @@ const char *stack_type_name(enum stack_type type)
        if (type == STACK_TYPE_IRQ)
                return "IRQ";
 
+       if (type == STACK_TYPE_ENTRY) {
+               /*
+                * On 64-bit, we have a generic entry stack that we
+                * use for all the kernel entry points, including
+                * SYSENTER.
+                */
+               return "ENTRY_TRAMPOLINE";
+       }
+
        if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
                return exception_stack_names[type - STACK_TYPE_EXCEPTION];
 
@@ -115,6 +124,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
        if (in_irq_stack(stack, info))
                goto recursion_check;
 
+       if (in_entry_stack(stack, info))
+               goto recursion_check;
+
        goto unknown;
 
 recursion_check:
index b6c6468e10bc96625c25c4ae8d19b734af81366b..4c8440de33559942a98245cf123a5ed10a30e406 100644 (file)
@@ -8,6 +8,7 @@
 #include <asm/segment.h>
 #include <asm/export.h>
 #include <asm/ftrace.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CC_USING_FENTRY
 # define function_hook __fentry__
@@ -197,7 +198,8 @@ ftrace_stub:
        movl    0x4(%ebp), %edx
        subl    $MCOUNT_INSN_SIZE, %eax
 
-       call    *ftrace_trace_function
+       movl    ftrace_trace_function, %ecx
+       CALL_NOSPEC %ecx
 
        popl    %edx
        popl    %ecx
@@ -241,5 +243,5 @@ return_to_handler:
        movl    %eax, %ecx
        popl    %edx
        popl    %eax
-       jmp     *%ecx
+       JMP_NOSPEC %ecx
 #endif
index c832291d948a6b4fd487a3e42fab8b8f7b9dd244..7cb8ba08beb997ef66724e2db5e66021c5ce32ee 100644 (file)
@@ -7,7 +7,7 @@
 #include <asm/ptrace.h>
 #include <asm/ftrace.h>
 #include <asm/export.h>
-
+#include <asm/nospec-branch.h>
 
        .code64
        .section .entry.text, "ax"
@@ -286,8 +286,8 @@ trace:
         * ip and parent ip are used and the list function is called when
         * function tracing is enabled.
         */
-       call   *ftrace_trace_function
-
+       movq ftrace_trace_function, %r8
+       CALL_NOSPEC %r8
        restore_mcount_regs
 
        jmp fgraph_trace
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
        movq 8(%rsp), %rdx
        movq (%rsp), %rax
        addq $24, %rsp
-       jmp *%rdi
+       JMP_NOSPEC %rdi
 #endif
index 7dca675fe78db60c5d79bc450bbd14bfee35cfc2..04a625f0fcda322dab7c9d8459a19ee56b71c936 100644 (file)
@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
        .balign PAGE_SIZE; \
 GLOBAL(name)
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ * Each PGD needs to be 8k long and 8k aligned.  We do not
+ * ever go out to userspace with these, so we do not
+ * strictly *need* the second page, but this allows us to
+ * have a single set_pgd() implementation that does not
+ * need to worry about whether it has 4k or 8k to work
+ * with.
+ *
+ * This ensures PGDs are 8k long:
+ */
+#define PTI_USER_PGD_FILL      512
+/* This ensures they are 8k-aligned: */
+#define NEXT_PGD_PAGE(name) \
+       .balign 2 * PAGE_SIZE; \
+GLOBAL(name)
+#else
+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#define PTI_USER_PGD_FILL      0
+#endif
+
 /* Automate the creation of 1 to 1 mapping pmd entries */
 #define PMDS(START, PERM, COUNT)                       \
        i = 0 ;                                         \
@@ -350,13 +371,14 @@ GLOBAL(name)
        .endr
 
        __INITDATA
-NEXT_PAGE(early_top_pgt)
+NEXT_PGD_PAGE(early_top_pgt)
        .fill   511,8,0
 #ifdef CONFIG_X86_5LEVEL
        .quad   level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 #else
        .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
 #endif
+       .fill   PTI_USER_PGD_FILL,8,0
 
 NEXT_PAGE(early_dynamic_pgts)
        .fill   512*EARLY_DYNAMIC_PAGE_TABLES,8,0
@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
        .data
 
 #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
        .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
        .org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
        .quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
        .org    init_top_pgt + PGD_START_KERNEL*8, 0
        /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
        .quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+       .fill   PTI_USER_PGD_FILL,8,0
 
 NEXT_PAGE(level3_ident_pgt)
        .quad   level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
         */
        PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
 #else
-NEXT_PAGE(init_top_pgt)
+NEXT_PGD_PAGE(init_top_pgt)
        .fill   512,8,0
+       .fill   PTI_USER_PGD_FILL,8,0
 #endif
 
 #ifdef CONFIG_X86_5LEVEL
index 3feb648781c470a7a49ee26749712ba7da891fe9..2f723301eb58fc5ad0d6796b342446ae2ee0c9e6 100644 (file)
@@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
         * because the ->io_bitmap_max value must match the bitmap
         * contents:
         */
-       tss = &per_cpu(cpu_tss, get_cpu());
+       tss = &per_cpu(cpu_tss_rw, get_cpu());
 
        if (turn_on)
                bitmap_clear(t->io_bitmap_ptr, from, num);
index 49cfd9fe7589fa5ef2bef5d4a5d6431b7007836f..68e1867cca8045d0ed728ffc6b75a866c25484ed 100644 (file)
@@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
        /* high bit used in ret_from_ code  */
        unsigned vector = ~regs->orig_ax;
 
-       /*
-        * NB: Unlike exception entries, IRQ entries do not reliably
-        * handle context tracking in the low-level entry code.  This is
-        * because syscall entries execute briefly with IRQs on before
-        * updating context tracking state, so we can take an IRQ from
-        * kernel mode with CONTEXT_USER.  The low-level entry code only
-        * updates the context if we came from user mode, so we won't
-        * switch to CONTEXT_KERNEL.  We'll fix that once the syscall
-        * code is cleaned up enough that we can cleanly defer enabling
-        * IRQs.
-        */
-
        entering_irq();
 
        /* entering_irq() tells RCU that we're not quiescent.  Check it. */
index a83b3346a0e104833793687aea68f7f216cf0374..c1bdbd3d3232cb83d07bfa4ce604ae0b620c796a 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/mm.h>
 
 #include <asm/apic.h>
+#include <asm/nospec-branch.h>
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
 static void call_on_stack(void *func, void *stack)
 {
        asm volatile("xchgl     %%ebx,%%esp     \n"
-                    "call      *%%edi          \n"
+                    CALL_NOSPEC
                     "movl      %%ebx,%%esp     \n"
                     : "=b" (stack)
                     : "0" (stack),
-                      "D"(func)
+                      [thunk_target] "D"(func)
                     : "memory", "cc", "edx", "ecx", "eax");
 }
 
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
                call_on_stack(print_stack_overflow, isp);
 
        asm volatile("xchgl     %%ebx,%%esp     \n"
-                    "call      *%%edi          \n"
+                    CALL_NOSPEC
                     "movl      %%ebx,%%esp     \n"
                     : "=a" (arg1), "=b" (isp)
                     :  "0" (desc),   "1" (isp),
-                       "D" (desc->handle_irq)
+                       [thunk_target] "D" (desc->handle_irq)
                     : "memory", "cc", "ecx");
        return 1;
 }
index 020efbf5786b35d343a8632cd14ac4f800465d9b..d86e344f5b3debfed504b72a7c0f83f36fe16387 100644 (file)
@@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
        if (regs->sp >= estack_top && regs->sp <= estack_bottom)
                return;
 
-       WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
+       WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
                current->comm, curbase, regs->sp,
                irq_stack_top, irq_stack_bottom,
-               estack_top, estack_bottom);
+               estack_top, estack_bottom, (void *)regs->ip);
 
        if (sysctl_panic_on_stackoverflow)
                panic("low stack detected by irq handler - check messages\n");
index 1c1eae9613406b14c3154065e1fd036f985a384c..26d713ecad34a847e650638d02442eda04379f5e 100644 (file)
@@ -5,6 +5,11 @@
  * Copyright (C) 2002 Andi Kleen
  *
  * This handles calls from both 32bit and 64bit mode.
+ *
+ * Lock order:
+ *     contex.ldt_usr_sem
+ *       mmap_sem
+ *         context.lock
  */
 
 #include <linux/errno.h>
@@ -19,6 +24,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/ldt.h>
+#include <asm/tlb.h>
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 #include <asm/syscalls.h>
@@ -42,17 +48,15 @@ static void refresh_ldt_segments(void)
 #endif
 }
 
-/* context.lock is held for us, so we don't need any locking. */
+/* context.lock is held by the task which issued the smp function call */
 static void flush_ldt(void *__mm)
 {
        struct mm_struct *mm = __mm;
-       mm_context_t *pc;
 
        if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
                return;
 
-       pc = &mm->context;
-       set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
+       load_mm_ldt(mm);
 
        refresh_ldt_segments();
 }
@@ -89,25 +93,143 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
                return NULL;
        }
 
+       /* The new LDT isn't aliased for PTI yet. */
+       new_ldt->slot = -1;
+
        new_ldt->nr_entries = num_entries;
        return new_ldt;
 }
 
+/*
+ * If PTI is enabled, this maps the LDT into the kernelmode and
+ * usermode tables for the given mm.
+ *
+ * There is no corresponding unmap function.  Even if the LDT is freed, we
+ * leave the PTEs around until the slot is reused or the mm is destroyed.
+ * This is harmless: the LDT is always in ordinary memory, and no one will
+ * access the freed slot.
+ *
+ * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
+ * it useful, and the flush would slow down modify_ldt().
+ */
+static int
+map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       bool is_vmalloc, had_top_level_entry;
+       unsigned long va;
+       spinlock_t *ptl;
+       pgd_t *pgd;
+       int i;
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return 0;
+
+       /*
+        * Any given ldt_struct should have map_ldt_struct() called at most
+        * once.
+        */
+       WARN_ON(ldt->slot != -1);
+
+       /*
+        * Did we already have the top level entry allocated?  We can't
+        * use pgd_none() for this because it doens't do anything on
+        * 4-level page table kernels.
+        */
+       pgd = pgd_offset(mm, LDT_BASE_ADDR);
+       had_top_level_entry = (pgd->pgd != 0);
+
+       is_vmalloc = is_vmalloc_addr(ldt->entries);
+
+       for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
+               unsigned long offset = i << PAGE_SHIFT;
+               const void *src = (char *)ldt->entries + offset;
+               unsigned long pfn;
+               pte_t pte, *ptep;
+
+               va = (unsigned long)ldt_slot_va(slot) + offset;
+               pfn = is_vmalloc ? vmalloc_to_pfn(src) :
+                       page_to_pfn(virt_to_page(src));
+               /*
+                * Treat the PTI LDT range as a *userspace* range.
+                * get_locked_pte() will allocate all needed pagetables
+                * and account for them in this mm.
+                */
+               ptep = get_locked_pte(mm, va, &ptl);
+               if (!ptep)
+                       return -ENOMEM;
+               /*
+                * Map it RO so the easy to find address is not a primary
+                * target via some kernel interface which misses a
+                * permission check.
+                */
+               pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
+               set_pte_at(mm, va, ptep, pte);
+               pte_unmap_unlock(ptep, ptl);
+       }
+
+       if (mm->context.ldt) {
+               /*
+                * We already had an LDT.  The top-level entry should already
+                * have been allocated and synchronized with the usermode
+                * tables.
+                */
+               WARN_ON(!had_top_level_entry);
+               if (static_cpu_has(X86_FEATURE_PTI))
+                       WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
+       } else {
+               /*
+                * This is the first time we're mapping an LDT for this process.
+                * Sync the pgd to the usermode tables.
+                */
+               WARN_ON(had_top_level_entry);
+               if (static_cpu_has(X86_FEATURE_PTI)) {
+                       WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
+                       set_pgd(kernel_to_user_pgdp(pgd), *pgd);
+               }
+       }
+
+       va = (unsigned long)ldt_slot_va(slot);
+       flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
+
+       ldt->slot = slot;
+#endif
+       return 0;
+}
+
+static void free_ldt_pgtables(struct mm_struct *mm)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       struct mmu_gather tlb;
+       unsigned long start = LDT_BASE_ADDR;
+       unsigned long end = start + (1UL << PGDIR_SHIFT);
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       tlb_gather_mmu(&tlb, mm, start, end);
+       free_pgd_range(&tlb, start, end, start, end);
+       tlb_finish_mmu(&tlb, start, end);
+#endif
+}
+
 /* After calling this, the LDT is immutable. */
 static void finalize_ldt_struct(struct ldt_struct *ldt)
 {
        paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
 }
 
-/* context.lock is held */
-static void install_ldt(struct mm_struct *current_mm,
-                       struct ldt_struct *ldt)
+static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
 {
+       mutex_lock(&mm->context.lock);
+
        /* Synchronizes with READ_ONCE in load_mm_ldt. */
-       smp_store_release(&current_mm->context.ldt, ldt);
+       smp_store_release(&mm->context.ldt, ldt);
 
-       /* Activate the LDT for all CPUs using current_mm. */
-       on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
+       /* Activate the LDT for all CPUs using currents mm. */
+       on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
+
+       mutex_unlock(&mm->context.lock);
 }
 
 static void free_ldt_struct(struct ldt_struct *ldt)
@@ -124,27 +246,20 @@ static void free_ldt_struct(struct ldt_struct *ldt)
 }
 
 /*
- * we do not have to muck with descriptors here, that is
- * done in switch_mm() as needed.
+ * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
+ * the new task is not running, so nothing can be installed.
  */
-int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
+int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
 {
        struct ldt_struct *new_ldt;
-       struct mm_struct *old_mm;
        int retval = 0;
 
-       mutex_init(&mm->context.lock);
-       old_mm = current->mm;
-       if (!old_mm) {
-               mm->context.ldt = NULL;
+       if (!old_mm)
                return 0;
-       }
 
        mutex_lock(&old_mm->context.lock);
-       if (!old_mm->context.ldt) {
-               mm->context.ldt = NULL;
+       if (!old_mm->context.ldt)
                goto out_unlock;
-       }
 
        new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
        if (!new_ldt) {
@@ -156,6 +271,12 @@ int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
               new_ldt->nr_entries * LDT_ENTRY_SIZE);
        finalize_ldt_struct(new_ldt);
 
+       retval = map_ldt_struct(mm, new_ldt, 0);
+       if (retval) {
+               free_ldt_pgtables(mm);
+               free_ldt_struct(new_ldt);
+               goto out_unlock;
+       }
        mm->context.ldt = new_ldt;
 
 out_unlock:
@@ -174,13 +295,18 @@ void destroy_context_ldt(struct mm_struct *mm)
        mm->context.ldt = NULL;
 }
 
+void ldt_arch_exit_mmap(struct mm_struct *mm)
+{
+       free_ldt_pgtables(mm);
+}
+
 static int read_ldt(void __user *ptr, unsigned long bytecount)
 {
        struct mm_struct *mm = current->mm;
        unsigned long entries_size;
        int retval;
 
-       mutex_lock(&mm->context.lock);
+       down_read(&mm->context.ldt_usr_sem);
 
        if (!mm->context.ldt) {
                retval = 0;
@@ -209,7 +335,7 @@ static int read_ldt(void __user *ptr, unsigned long bytecount)
        retval = bytecount;
 
 out_unlock:
-       mutex_unlock(&mm->context.lock);
+       up_read(&mm->context.ldt_usr_sem);
        return retval;
 }
 
@@ -269,7 +395,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
                        ldt.avl = 0;
        }
 
-       mutex_lock(&mm->context.lock);
+       if (down_write_killable(&mm->context.ldt_usr_sem))
+               return -EINTR;
 
        old_ldt       = mm->context.ldt;
        old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
@@ -286,12 +413,31 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
        new_ldt->entries[ldt_info.entry_number] = ldt;
        finalize_ldt_struct(new_ldt);
 
+       /*
+        * If we are using PTI, map the new LDT into the userspace pagetables.
+        * If there is already an LDT, use the other slot so that other CPUs
+        * will continue to use the old LDT until install_ldt() switches
+        * them over to the new LDT.
+        */
+       error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
+       if (error) {
+               /*
+                * This only can fail for the first LDT setup. If an LDT is
+                * already installed then the PTE page is already
+                * populated. Mop up a half populated page table.
+                */
+               if (!WARN_ON_ONCE(old_ldt))
+                       free_ldt_pgtables(mm);
+               free_ldt_struct(new_ldt);
+               goto out_unlock;
+       }
+
        install_ldt(mm, new_ldt);
        free_ldt_struct(old_ldt);
        error = 0;
 
 out_unlock:
-       mutex_unlock(&mm->context.lock);
+       up_write(&mm->context.ldt_usr_sem);
 out:
        return error;
 }
index 00bc751c861ce8fa42c1b93d39d029694f4e328d..edfede76868870e4cf86fed553377fd6bd0cac06 100644 (file)
@@ -48,8 +48,6 @@ static void load_segments(void)
                "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
                "\tmovl %%eax,%%ds\n"
                "\tmovl %%eax,%%es\n"
-               "\tmovl %%eax,%%fs\n"
-               "\tmovl %%eax,%%gs\n"
                "\tmovl %%eax,%%ss\n"
                : : : "eax", "memory");
 #undef STR
@@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image)
         * The gdt & idt are now invalid.
         * If you want to load them you must set up your own idt & gdt.
         */
-       set_gdt(phys_to_virt(0), 0);
        idt_invalidate(phys_to_virt(0));
+       set_gdt(phys_to_virt(0), 0);
 
        /* now call it */
        image->start = relocate_kernel_ptr((unsigned long)image->head,
index ac0be8283325edfdc2752f862b4c0cef208a931c..9edadabf04f66c657f8a29bb56fe994b2559d5cf 100644 (file)
@@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
 DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
 DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
 DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
 
 DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
@@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                PATCH_SITE(pv_mmu_ops, read_cr2);
                PATCH_SITE(pv_mmu_ops, read_cr3);
                PATCH_SITE(pv_mmu_ops, write_cr3);
-               PATCH_SITE(pv_mmu_ops, flush_tlb_single);
                PATCH_SITE(pv_cpu_ops, wbinvd);
 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
                case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
index bb988a24db927d758f9120d45f90d1c160628790..832a6acd730fad70e1426052d502f6438b30e38e 100644 (file)
@@ -47,7 +47,7 @@
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
        .x86_tss = {
                /*
                 * .sp0 is only used when entering ring 0 from a lower
@@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
                 * Poison it.
                 */
                .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
+
+#ifdef CONFIG_X86_64
+               /*
+                * .sp1 is cpu_current_top_of_stack.  The init task never
+                * runs user code, but cpu_current_top_of_stack should still
+                * be well defined before the first context switch.
+                */
+               .sp1 = TOP_OF_INIT_STACK,
+#endif
+
 #ifdef CONFIG_X86_32
                .ss0 = __KERNEL_DS,
                .ss1 = __KERNEL_CS,
@@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
          */
        .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
 #endif
-#ifdef CONFIG_X86_32
-       .SYSENTER_stack_canary  = STACK_END_MAGIC,
-#endif
 };
-EXPORT_PER_CPU_SYMBOL(cpu_tss);
+EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
 
 DEFINE_PER_CPU(bool, __tss_limit_invalid);
 EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
@@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
        struct fpu *fpu = &t->fpu;
 
        if (bp) {
-               struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
+               struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
 
                t->io_bitmap_ptr = NULL;
                clear_thread_flag(TIF_IO_BITMAP);
index 45bf0c5f93e15103060d67d5245756ab72ce8fe5..5224c609918416337b97440eb2d515d8052463ae 100644 (file)
@@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct fpu *prev_fpu = &prev->fpu;
        struct fpu *next_fpu = &next->fpu;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
        /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
index eeeb34f85c250e8c01188b6d32cf5a62bd1af8a0..c754662320163107ca3a254362ce0e404a8d3c11 100644 (file)
@@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
        unsigned int fsindex, gsindex;
        unsigned int ds, cs, es;
 
-       printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
-       printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
-               regs->sp, regs->flags);
+       show_iret_regs(regs);
+
        if (regs->orig_ax != -1)
                pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
        else
@@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
        printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
               regs->r13, regs->r14, regs->r15);
 
+       if (!all)
+               return;
+
        asm("movl %%ds,%0" : "=r" (ds));
        asm("movl %%cs,%0" : "=r" (cs));
        asm("movl %%es,%0" : "=r" (es));
@@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
        rdmsrl(MSR_GS_BASE, gs);
        rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 
-       if (!all)
-               return;
-
        cr0 = read_cr0();
        cr2 = read_cr2();
        cr3 = __read_cr3();
@@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct fpu *prev_fpu = &prev->fpu;
        struct fpu *next_fpu = &next->fpu;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
        WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
                     this_cpu_read(irq_count) != -1);
@@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * Switch the PDA and FPU contexts.
         */
        this_cpu_write(current_task, next_p);
+       this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
 
        /* Reload sp0. */
        update_sp0(next_p);
index 8af2e8d0c0a1d2d0290ff2026afeab056cdc59b6..145810b0edf6738b57a0e6560c53392ae54a82e7 100644 (file)
@@ -906,9 +906,6 @@ void __init setup_arch(char **cmdline_p)
                set_bit(EFI_BOOT, &efi.flags);
                set_bit(EFI_64BIT, &efi.flags);
        }
-
-       if (efi_enabled(EFI_BOOT))
-               efi_memblock_x86_reserve_range();
 #endif
 
        x86_init.oem.arch_setup();
@@ -962,6 +959,8 @@ void __init setup_arch(char **cmdline_p)
 
        parse_early_param();
 
+       if (efi_enabled(EFI_BOOT))
+               efi_memblock_x86_reserve_range();
 #ifdef CONFIG_MEMORY_HOTPLUG
        /*
         * Memory used by the kernel cannot be hot-removed because Linux
index 35cb20994e32d2bf05f0b1510ccc26cc7e7590a5..ed556d50d7ed6d71f03d202ef84a6b6b54e95a02 100644 (file)
@@ -126,25 +126,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
        spin_lock_irqsave(&rtc_lock, flags);
        CMOS_WRITE(0xa, 0xf);
        spin_unlock_irqrestore(&rtc_lock, flags);
-       local_flush_tlb();
-       pr_debug("1.\n");
        *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
                                                        start_eip >> 4;
-       pr_debug("2.\n");
        *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
                                                        start_eip & 0xf;
-       pr_debug("3.\n");
 }
 
 static inline void smpboot_restore_warm_reset_vector(void)
 {
        unsigned long flags;
 
-       /*
-        * Install writable page 0 entry to set BIOS data area.
-        */
-       local_flush_tlb();
-
        /*
         * Paranoid:  Set warm reset code and vector here back
         * to default values.
@@ -932,12 +923,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
        initial_code = (unsigned long)start_secondary;
        initial_stack  = idle->thread.sp;
 
-       /*
-        * Enable the espfix hack for this CPU
-       */
-#ifdef CONFIG_X86_ESPFIX64
+       /* Enable the espfix hack for this CPU */
        init_espfix_ap(cpu);
-#endif
 
        /* So we see what's up */
        announce_cpu(cpu, apicid);
index 77835bc021c766744dd1976b4429141e3fb645fa..093f2ea5dd56b613d03ccaeeb87fd50900f64ba1 100644 (file)
@@ -102,7 +102,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
        for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
             unwind_next_frame(&state)) {
 
-               regs = unwind_get_entry_regs(&state);
+               regs = unwind_get_entry_regs(&state, NULL);
                if (regs) {
                        /*
                         * Kernel mode registers on the stack indicate an
@@ -164,8 +164,12 @@ int save_stack_trace_tsk_reliable(struct task_struct *tsk,
 {
        int ret;
 
+       /*
+        * If the task doesn't have a stack (e.g., a zombie), the stack is
+        * "reliably" empty.
+        */
        if (!try_get_task_stack(tsk))
-               return -EINVAL;
+               return 0;
 
        ret = __save_stack_trace_reliable(trace, tsk);
 
index a4eb27918cebf99dc2415f7eec49c4838d6f9f41..a2486f4440734756d49a96313c0e2f9a174a87db 100644 (file)
@@ -138,6 +138,17 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
                return -1;
        set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
        pte_unmap(pte);
+
+       /*
+        * PTI poisons low addresses in the kernel page tables in the
+        * name of making them unusable for userspace.  To execute
+        * code at such a low address, the poison must be cleared.
+        *
+        * Note: 'pgd' actually gets set in p4d_alloc() _or_
+        * pud_alloc() depending on 4/5-level paging.
+        */
+       pgd->pgd &= ~_PAGE_NX;
+
        return 0;
 }
 
index 9a9c9b076955dd493c7de80b8d814a27dd0fceb7..a5b802a1221272402b344d75ccf94bcff4b69b38 100644 (file)
@@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
        cpu = get_cpu();
 
        while (n-- > 0) {
-               if (LDT_empty(info) || LDT_zero(info)) {
+               if (LDT_empty(info) || LDT_zero(info))
                        memset(desc, 0, sizeof(*desc));
-               } else {
+               else
                        fill_ldt(desc, info);
-
-                       /*
-                        * Always set the accessed bit so that the CPU
-                        * doesn't try to write to the (read-only) GDT.
-                        */
-                       desc->type |= 1;
-               }
                ++info;
                ++desc;
        }
index 989514c94a55d8fa93a07192edd199be1a607bf8..446c9ef8cfc32b68d77d4429128b3c794b3fc069 100644 (file)
@@ -51,6 +51,7 @@
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/fpu/internal.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/mce.h>
 #include <asm/fixmap.h>
 #include <asm/mach_traps.h>
@@ -348,23 +349,42 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 
        /*
         * If IRET takes a non-IST fault on the espfix64 stack, then we
-        * end up promoting it to a doublefault.  In that case, modify
-        * the stack to make it look like we just entered the #GP
-        * handler from user space, similar to bad_iret.
+        * end up promoting it to a doublefault.  In that case, take
+        * advantage of the fact that we're not using the normal (TSS.sp0)
+        * stack right now.  We can write a fake #GP(0) frame at TSS.sp0
+        * and then modify our own IRET frame so that, when we return,
+        * we land directly at the #GP(0) vector with the stack already
+        * set up according to its expectations.
+        *
+        * The net result is that our #GP handler will think that we
+        * entered from usermode with the bad user context.
         *
         * No need for ist_enter here because we don't use RCU.
         */
-       if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
+       if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
                regs->cs == __KERNEL_CS &&
                regs->ip == (unsigned long)native_irq_return_iret)
        {
-               struct pt_regs *normal_regs = task_pt_regs(current);
+               struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
-               /* Fake a #GP(0) from userspace. */
-               memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
-               normal_regs->orig_ax = 0;  /* Missing (lost) #GP error code */
+               /*
+                * regs->sp points to the failing IRET frame on the
+                * ESPFIX64 stack.  Copy it to the entry stack.  This fills
+                * in gpregs->ss through gpregs->ip.
+                *
+                */
+               memmove(&gpregs->ip, (void *)regs->sp, 5*8);
+               gpregs->orig_ax = 0;  /* Missing (lost) #GP error code */
+
+               /*
+                * Adjust our frame so that we return straight to the #GP
+                * vector with the expected RSP value.  This is safe because
+                * we won't enable interupts or schedule before we invoke
+                * general_protection, so nothing will clobber the stack
+                * frame we just set up.
+                */
                regs->ip = (unsigned long)general_protection;
-               regs->sp = (unsigned long)&normal_regs->orig_ax;
+               regs->sp = (unsigned long)&gpregs->orig_ax;
 
                return;
        }
@@ -389,7 +409,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
         *
         *   Processors update CR2 whenever a page fault is detected. If a
         *   second page fault occurs while an earlier page fault is being
-        *   delivered, the faulting linear address of the second fault will
+        *   delivered, the faulting linear address of the second fault will
         *   overwrite the contents of CR2 (replacing the previous
         *   address). These updates to CR2 occur even if the page fault
         *   results in a double fault or occurs during the delivery of a
@@ -605,14 +625,15 @@ NOKPROBE_SYMBOL(do_int3);
 
 #ifdef CONFIG_X86_64
 /*
- * Help handler running on IST stack to switch off the IST stack if the
- * interrupted code was in user mode. The actual stack switch is done in
- * entry_64.S
+ * Help handler running on a per-cpu (IST or entry trampoline) stack
+ * to switch to the normal thread stack if the interrupted code was in
+ * user mode. The actual stack switch is done in entry_64.S
  */
 asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
-       struct pt_regs *regs = task_pt_regs(current);
-       *regs = *eregs;
+       struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
+       if (regs != eregs)
+               *regs = *eregs;
        return regs;
 }
 NOKPROBE_SYMBOL(sync_regs);
@@ -628,13 +649,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
        /*
         * This is called from entry_64.S early in handling a fault
         * caused by a bad iret to user mode.  To handle the fault
-        * correctly, we want move our stack frame to task_pt_regs
-        * and we want to pretend that the exception came from the
-        * iret target.
+        * correctly, we want to move our stack frame to where it would
+        * be had we entered directly on the entry stack (rather than
+        * just below the IRET frame) and we want to pretend that the
+        * exception came from the IRET target.
         */
        struct bad_iret_stack *new_stack =
-               container_of(task_pt_regs(current),
-                            struct bad_iret_stack, regs);
+               (struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
 
        /* Copy the IRET target to the new stack. */
        memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
@@ -795,14 +816,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
        debug_stack_usage_dec();
 
 exit:
-#if defined(CONFIG_X86_32)
-       /*
-        * This is the most likely code path that involves non-trivial use
-        * of the SYSENTER stack.  Check that we haven't overrun it.
-        */
-       WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
-            "Overran or corrupted SYSENTER stack\n");
-#endif
        ist_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug);
@@ -929,6 +942,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 
 void __init trap_init(void)
 {
+       /* Init cpu_entry_area before IST entries are set up */
+       setup_cpu_entry_areas();
+
        idt_setup_traps();
 
        /*
@@ -936,8 +952,9 @@ void __init trap_init(void)
         * "sidt" instruction will not leak the location of the kernel, and
         * to defend the IDT against arbitrary memory write vulnerabilities.
         * It will be reloaded in cpu_init() */
-       __set_fixmap(FIX_RO_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO);
-       idt_descr.address = fix_to_virt(FIX_RO_IDT);
+       cea_set_pte(CPU_ENTRY_AREA_RO_IDT_VADDR, __pa_symbol(idt_table),
+                   PAGE_KERNEL_RO);
+       idt_descr.address = CPU_ENTRY_AREA_RO_IDT;
 
        /*
         * Should be a barrier for any external CPU state:
index a3f973b2c97a03b121fe0173dbdc9298216721e6..be86a865087a6b9dc8e04031dbf2e2fbeeda1ed5 100644 (file)
@@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
        return NULL;
 }
 
-static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
+static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
                            size_t len)
 {
        struct stack_info *info = &state->stack_info;
+       void *addr = (void *)_addr;
 
-       /*
-        * If the address isn't on the current stack, switch to the next one.
-        *
-        * We may have to traverse multiple stacks to deal with the possibility
-        * that info->next_sp could point to an empty stack and the address
-        * could be on a subsequent stack.
-        */
-       while (!on_stack(info, (void *)addr, len))
-               if (get_stack_info(info->next_sp, state->task, info,
-                                  &state->stack_mask))
-                       return false;
+       if (!on_stack(info, addr, len) &&
+           (get_stack_info(addr, state->task, info, &state->stack_mask)))
+               return false;
 
        return true;
 }
@@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
        return true;
 }
 
-#define REGS_SIZE (sizeof(struct pt_regs))
-#define SP_OFFSET (offsetof(struct pt_regs, sp))
-#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
-#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
-
 static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
-                            unsigned long *ip, unsigned long *sp, bool full)
+                            unsigned long *ip, unsigned long *sp)
 {
-       size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
-       size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
-       struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
-
-       if (IS_ENABLED(CONFIG_X86_64)) {
-               if (!stack_access_ok(state, addr, regs_size))
-                       return false;
+       struct pt_regs *regs = (struct pt_regs *)addr;
 
-               *ip = regs->ip;
-               *sp = regs->sp;
+       /* x86-32 support will be more complicated due to the &regs->sp hack */
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
 
-               return true;
-       }
-
-       if (!stack_access_ok(state, addr, sp_offset))
+       if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
                return false;
 
        *ip = regs->ip;
+       *sp = regs->sp;
+       return true;
+}
 
-       if (user_mode(regs)) {
-               if (!stack_access_ok(state, addr + sp_offset,
-                                    REGS_SIZE - SP_OFFSET))
-                       return false;
+static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
+                                 unsigned long *ip, unsigned long *sp)
+{
+       struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
 
-               *sp = regs->sp;
-       } else
-               *sp = (unsigned long)&regs->sp;
+       if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
+               return false;
 
+       *ip = regs->ip;
+       *sp = regs->sp;
        return true;
 }
 
@@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
        unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
        enum stack_type prev_type = state->stack_info.type;
        struct orc_entry *orc;
-       struct pt_regs *ptregs;
        bool indirect = false;
 
        if (unwind_done(state))
@@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
                break;
 
        case ORC_TYPE_REGS:
-               if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
+               if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
                        orc_warn("can't dereference registers at %p for ip %pB\n",
                                 (void *)sp, (void *)orig_ip);
                        goto done;
@@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
                break;
 
        case ORC_TYPE_REGS_IRET:
-               if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
+               if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
                        orc_warn("can't dereference iret registers at %p for ip %pB\n",
                                 (void *)sp, (void *)orig_ip);
                        goto done;
                }
 
-               ptregs = container_of((void *)sp, struct pt_regs, ip);
-               if ((unsigned long)ptregs >= prev_sp &&
-                   on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
-                       state->regs = ptregs;
-                       state->full_regs = false;
-               } else
-                       state->regs = NULL;
-
+               state->regs = (void *)sp - IRET_FRAME_OFFSET;
+               state->full_regs = false;
                state->signal = true;
                break;
 
@@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
        }
 
        if (get_stack_info((unsigned long *)state->sp, state->task,
-                          &state->stack_info, &state->stack_mask))
-               return;
+                          &state->stack_info, &state->stack_mask)) {
+               /*
+                * We weren't on a valid stack.  It's possible that
+                * we overflowed a valid stack into a guard page.
+                * See if the next page up is valid so that we can
+                * generate some kind of backtrace if this happens.
+                */
+               void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
+               if (get_stack_info(next_page, state->task, &state->stack_info,
+                                  &state->stack_mask))
+                       return;
+       }
 
        /*
         * The caller can provide the address of the first frame directly
index a4009fb9be8725ce7bda96cd5e8160e524903266..1e413a9326aaa152a47d51fa4c1d1ea03cbb4d94 100644 (file)
@@ -61,11 +61,17 @@ jiffies_64 = jiffies;
                . = ALIGN(HPAGE_SIZE);                          \
                __end_rodata_hpage_align = .;
 
+#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
+#define ALIGN_ENTRY_TEXT_END   . = ALIGN(PMD_SIZE);
+
 #else
 
 #define X64_ALIGN_RODATA_BEGIN
 #define X64_ALIGN_RODATA_END
 
+#define ALIGN_ENTRY_TEXT_BEGIN
+#define ALIGN_ENTRY_TEXT_END
+
 #endif
 
 PHDRS {
@@ -102,11 +108,22 @@ SECTIONS
                CPUIDLE_TEXT
                LOCK_TEXT
                KPROBES_TEXT
+               ALIGN_ENTRY_TEXT_BEGIN
                ENTRY_TEXT
                IRQENTRY_TEXT
+               ALIGN_ENTRY_TEXT_END
                SOFTIRQENTRY_TEXT
                *(.fixup)
                *(.gnu.warning)
+
+#ifdef CONFIG_X86_64
+               . = ALIGN(PAGE_SIZE);
+               _entry_trampoline = .;
+               *(.entry_trampoline)
+               . = ALIGN(PAGE_SIZE);
+               ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
+#endif
+
                /* End of text section */
                _etext = .;
        } :text = 0x9090
index abe74f779f9d793e9a6c2f19417f23b5aa7ce484..b514b2b2845a334d4b53f28ed0b73c96f12d0e6a 100644 (file)
@@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
 }
 
 static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
-                                    u64 cr0, u64 cr4)
+                                   u64 cr0, u64 cr3, u64 cr4)
 {
        int bad;
+       u64 pcid;
+
+       /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
+       pcid = 0;
+       if (cr4 & X86_CR4_PCIDE) {
+               pcid = cr3 & 0xfff;
+               cr3 &= ~0xfff;
+       }
+
+       bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+       if (bad)
+               return X86EMUL_UNHANDLEABLE;
 
        /*
         * First enable PAE, long mode needs it before CR0.PG = 1 is set.
@@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
                bad = ctxt->ops->set_cr(ctxt, 4, cr4);
                if (bad)
                        return X86EMUL_UNHANDLEABLE;
+               if (pcid) {
+                       bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+                       if (bad)
+                               return X86EMUL_UNHANDLEABLE;
+               }
+
        }
 
        return X86EMUL_CONTINUE;
@@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
        struct desc_struct desc;
        struct desc_ptr dt;
        u16 selector;
-       u32 val, cr0, cr4;
+       u32 val, cr0, cr3, cr4;
        int i;
 
        cr0 =                      GET_SMSTATE(u32, smbase, 0x7ffc);
-       ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+       cr3 =                      GET_SMSTATE(u32, smbase, 0x7ff8);
        ctxt->eflags =             GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
        ctxt->_eip =               GET_SMSTATE(u32, smbase, 0x7ff0);
 
@@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
 
        ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
 
-       return rsm_enter_protected_mode(ctxt, cr0, cr4);
+       return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
 }
 
 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
 {
        struct desc_struct desc;
        struct desc_ptr dt;
-       u64 val, cr0, cr4;
+       u64 val, cr0, cr3, cr4;
        u32 base3;
        u16 selector;
        int i, r;
@@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
        ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
 
        cr0 =                       GET_SMSTATE(u64, smbase, 0x7f58);
-       ctxt->ops->set_cr(ctxt, 3,  GET_SMSTATE(u64, smbase, 0x7f50));
+       cr3 =                       GET_SMSTATE(u64, smbase, 0x7f50);
        cr4 =                       GET_SMSTATE(u64, smbase, 0x7f48);
        ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
        val =                       GET_SMSTATE(u64, smbase, 0x7ed0);
@@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
        dt.address =                GET_SMSTATE(u64, smbase, 0x7e68);
        ctxt->ops->set_gdt(ctxt, &dt);
 
-       r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+       r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
        if (r != X86EMUL_CONTINUE)
                return r;
 
index e5e66e5c664057bb5cc5ad2660008ccbf19b69e5..2b8eb4da4d0823bdcdf7bde1feb44132d0113cde 100644 (file)
@@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                spin_lock(&vcpu->kvm->mmu_lock);
                if(make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, 0, 0,
                                vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
                        spin_lock(&vcpu->kvm->mmu_lock);
                        if (make_mmu_pages_available(vcpu) < 0) {
                                spin_unlock(&vcpu->kvm->mmu_lock);
-                               return 1;
+                               return -ENOSPC;
                        }
                        sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
                                        i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                spin_lock(&vcpu->kvm->mmu_lock);
                if (make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
                                vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                spin_lock(&vcpu->kvm->mmu_lock);
                if (make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
-                       return 1;
+                       return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
                                      0, ACC_ALL);
@@ -3781,7 +3781,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 {
        if (unlikely(!lapic_in_kernel(vcpu) ||
-                    kvm_event_needs_reinjection(vcpu)))
+                    kvm_event_needs_reinjection(vcpu) ||
+                    vcpu->arch.exception.pending))
                return false;
 
        if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
@@ -5465,30 +5466,34 @@ static void mmu_destroy_caches(void)
 
 int kvm_mmu_module_init(void)
 {
+       int ret = -ENOMEM;
+
        kvm_mmu_clear_all_pte_masks();
 
        pte_list_desc_cache = kmem_cache_create("pte_list_desc",
                                            sizeof(struct pte_list_desc),
                                            0, SLAB_ACCOUNT, NULL);
        if (!pte_list_desc_cache)
-               goto nomem;
+               goto out;
 
        mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
                                                  sizeof(struct kvm_mmu_page),
                                                  0, SLAB_ACCOUNT, NULL);
        if (!mmu_page_header_cache)
-               goto nomem;
+               goto out;
 
        if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
-               goto nomem;
+               goto out;
 
-       register_shrinker(&mmu_shrinker);
+       ret = register_shrinker(&mmu_shrinker);
+       if (ret)
+               goto out;
 
        return 0;
 
-nomem:
+out:
        mmu_destroy_caches();
-       return -ENOMEM;
+       return ret;
 }
 
 /*
index eb714f1cdf7eee4ca9036005c3ab72ef9228ae9b..f40d0da1f1d321e4705f24ee85b80ad15233e438 100644 (file)
@@ -45,6 +45,7 @@
 #include <asm/debugreg.h>
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
+#include <asm/nospec-branch.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
@@ -361,7 +362,6 @@ static void recalc_intercepts(struct vcpu_svm *svm)
 {
        struct vmcb_control_area *c, *h;
        struct nested_state *g;
-       u32 h_intercept_exceptions;
 
        mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 
@@ -372,14 +372,9 @@ static void recalc_intercepts(struct vcpu_svm *svm)
        h = &svm->nested.hsave->control;
        g = &svm->nested;
 
-       /* No need to intercept #UD if L1 doesn't intercept it */
-       h_intercept_exceptions =
-               h->intercept_exceptions & ~(1U << UD_VECTOR);
-
        c->intercept_cr = h->intercept_cr | g->intercept_cr;
        c->intercept_dr = h->intercept_dr | g->intercept_dr;
-       c->intercept_exceptions =
-               h_intercept_exceptions | g->intercept_exceptions;
+       c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
        c->intercept = h->intercept | g->intercept;
 }
 
@@ -2202,7 +2197,6 @@ static int ud_interception(struct vcpu_svm *svm)
 {
        int er;
 
-       WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
        er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
        if (er == EMULATE_USER_EXIT)
                return 0;
@@ -4985,6 +4979,25 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
                "mov %%r13, %c[r13](%[svm]) \n\t"
                "mov %%r14, %c[r14](%[svm]) \n\t"
                "mov %%r15, %c[r15](%[svm]) \n\t"
+#endif
+               /*
+               * Clear host registers marked as clobbered to prevent
+               * speculative use.
+               */
+               "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
+               "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
+               "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
+               "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+               "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
+#ifdef CONFIG_X86_64
+               "xor %%r8, %%r8 \n\t"
+               "xor %%r9, %%r9 \n\t"
+               "xor %%r10, %%r10 \n\t"
+               "xor %%r11, %%r11 \n\t"
+               "xor %%r12, %%r12 \n\t"
+               "xor %%r13, %%r13 \n\t"
+               "xor %%r14, %%r14 \n\t"
+               "xor %%r15, %%r15 \n\t"
 #endif
                "pop %%" _ASM_BP
                :
@@ -5015,6 +5028,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
                );
 
+       /* Eliminate branch target predictions from guest mode */
+       vmexit_fill_RSB();
+
 #ifdef CONFIG_X86_64
        wrmsrl(MSR_GS_BASE, svm->host.gs_base);
 #else
index 8eba631c4dbd509d8687c6135e8dba267042f5e0..c829d89e2e63f85bc36c6821b0ca1d7712297043 100644 (file)
@@ -50,6 +50,7 @@
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
 #include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
 
 #include "trace.h"
 #include "pmu.h"
@@ -899,8 +900,16 @@ static inline short vmcs_field_to_offset(unsigned long field)
 {
        BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
 
-       if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
-           vmcs_field_to_offset_table[field] == 0)
+       if (field >= ARRAY_SIZE(vmcs_field_to_offset_table))
+               return -ENOENT;
+
+       /*
+        * FIXME: Mitigation for CVE-2017-5753.  To be replaced with a
+        * generic mechanism.
+        */
+       asm("lfence");
+
+       if (vmcs_field_to_offset_table[field] == 0)
                return -ENOENT;
 
        return vmcs_field_to_offset_table[field];
@@ -1887,7 +1896,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
        u32 eb;
 
-       eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) |
+       eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
             (1u << DB_VECTOR) | (1u << AC_VECTOR);
        if ((vcpu->guest_debug &
             (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
@@ -1905,8 +1914,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
         */
        if (is_guest_mode(vcpu))
                eb |= get_vmcs12(vcpu)->exception_bitmap;
-       else
-               eb |= 1u << UD_VECTOR;
 
        vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -2302,7 +2309,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                 * processors.  See 22.2.4.
                 */
                vmcs_writel(HOST_TR_BASE,
-                           (unsigned long)this_cpu_ptr(&cpu_tss));
+                           (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
                vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt);   /* 22.2.4 */
 
                /*
@@ -5917,7 +5924,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
                return 1;  /* already handled by vmx_vcpu_run() */
 
        if (is_invalid_opcode(intr_info)) {
-               WARN_ON_ONCE(is_guest_mode(vcpu));
                er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
                if (er == EMULATE_USER_EXIT)
                        return 0;
@@ -9415,6 +9421,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                /* Save guest registers, load host registers, keep flags */
                "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
                "pop %0 \n\t"
+               "setbe %c[fail](%0)\n\t"
                "mov %%" _ASM_AX ", %c[rax](%0) \n\t"
                "mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
                __ASM_SIZE(pop) " %c[rcx](%0) \n\t"
@@ -9431,12 +9438,23 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
                "mov %%r13, %c[r13](%0) \n\t"
                "mov %%r14, %c[r14](%0) \n\t"
                "mov %%r15, %c[r15](%0) \n\t"
+               "xor %%r8d,  %%r8d \n\t"
+               "xor %%r9d,  %%r9d \n\t"
+               "xor %%r10d, %%r10d \n\t"
+               "xor %%r11d, %%r11d \n\t"
+               "xor %%r12d, %%r12d \n\t"
+               "xor %%r13d, %%r13d \n\t"
+               "xor %%r14d, %%r14d \n\t"
+               "xor %%r15d, %%r15d \n\t"
 #endif
                "mov %%cr2, %%" _ASM_AX "   \n\t"
                "mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
 
+               "xor %%eax, %%eax \n\t"
+               "xor %%ebx, %%ebx \n\t"
+               "xor %%esi, %%esi \n\t"
+               "xor %%edi, %%edi \n\t"
                "pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
-               "setbe %c[fail](%0) \n\t"
                ".pushsection .rodata \n\t"
                ".global vmx_return \n\t"
                "vmx_return: " _ASM_PTR " 2b \n\t"
@@ -9473,6 +9491,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
              );
 
+       /* Eliminate branch target predictions from guest mode */
+       vmexit_fill_RSB();
+
        /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
        if (debugctlmsr)
                update_debugctlmsr(debugctlmsr);
index faf843c9b916ead0992d0b155a138c6afdf7ae57..1cec2c62a0b08405d2bd7c8908d6b7f33de3b63c 100644 (file)
@@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
                                         addr, n, v))
                    && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
                        break;
-               trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
                handled += n;
                addr += n;
                len -= n;
@@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
 {
        if (vcpu->mmio_read_completed) {
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
-                              vcpu->mmio_fragments[0].gpa, *(u64 *)val);
+                              vcpu->mmio_fragments[0].gpa, val);
                vcpu->mmio_read_completed = 0;
                return 1;
        }
@@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
 {
-       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
        return vcpu_mmio_write(vcpu, gpa, bytes, val);
 }
 
 static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
                          void *val, int bytes)
 {
-       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
+       trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
        return X86EMUL_IO_NEEDED;
 }
 
@@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-       struct fpu *fpu = &current->thread.fpu;
        int r;
 
-       fpu__initialize(fpu);
-
        kvm_sigset_activate(vcpu);
 
+       kvm_load_guest_fpu(vcpu);
+
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                if (kvm_run->immediate_exit) {
                        r = -EINTR;
@@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                }
        }
 
-       kvm_load_guest_fpu(vcpu);
-
        if (unlikely(vcpu->arch.complete_userspace_io)) {
                int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
                vcpu->arch.complete_userspace_io = NULL;
                r = cui(vcpu);
                if (r <= 0)
-                       goto out_fpu;
+                       goto out;
        } else
                WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
@@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        else
                r = vcpu_run(vcpu);
 
-out_fpu:
-       kvm_put_guest_fpu(vcpu);
 out:
+       kvm_put_guest_fpu(vcpu);
        post_kvm_run_save(vcpu);
        kvm_sigset_deactivate(vcpu);
 
@@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 #endif
 
        kvm_rip_write(vcpu, regs->rip);
-       kvm_set_rflags(vcpu, regs->rflags);
+       kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
 
        vcpu->arch.exception.pending = false;
 
@@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
+int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+       if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
+               /*
+                * When EFER.LME and CR0.PG are set, the processor is in
+                * 64-bit mode (though maybe in a 32-bit code segment).
+                * CR4.PAE and EFER.LMA must be set.
+                */
+               if (!(sregs->cr4 & X86_CR4_PAE_BIT)
+                   || !(sregs->efer & EFER_LMA))
+                       return -EINVAL;
+       } else {
+               /*
+                * Not in 64-bit mode: EFER.LMA is clear and the code
+                * segment cannot be 64-bit.
+                */
+               if (sregs->efer & EFER_LMA || sregs->cs.l)
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                  struct kvm_sregs *sregs)
 {
@@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                        (sregs->cr4 & X86_CR4_OSXSAVE))
                return -EINVAL;
 
+       if (kvm_valid_sregs(vcpu, sregs))
+               return -EINVAL;
+
        apic_base_msr.data = sregs->apic_base;
        apic_base_msr.host_initiated = true;
        if (kvm_set_apic_base(vcpu, &apic_base_msr))
index 7b181b61170e769fc41a062a3eddbb62c4e53793..f23934bbaf4ebe6a55331669160d6b0aa72ba5d4 100644 (file)
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 
index 4d34bb548b41ebdddc20fcf464aad4cb44c6a763..46e71a74e6129b861c233ccf86f452b485e62d59 100644 (file)
@@ -29,7 +29,8 @@
 #include <asm/errno.h>
 #include <asm/asm.h>
 #include <asm/export.h>
-                               
+#include <asm/nospec-branch.h>
+
 /*
  * computes a partial checksum, e.g. for TCP/UDP fragments
  */
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
        negl %ebx
        lea 45f(%ebx,%ebx,2), %ebx
        testl %esi, %esi
-       jmp *%ebx
+       JMP_NOSPEC %ebx
 
        # Handle 2-byte-aligned regions
 20:    addw (%esi), %ax
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
        andl $-32,%edx
        lea 3f(%ebx,%ebx), %ebx
        testl %esi, %esi 
-       jmp *%ebx
+       JMP_NOSPEC %ebx
 1:     addl $64,%esi
        addl $64,%edi 
        SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
index 553f8fd23cc4733d0edafa862b95446f7a04bab1..4846eff7e4c8b1505501d7f1dcb64127d0a4c67c 100644 (file)
@@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
                delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
 
                /*
-                * Use cpu_tss as a cacheline-aligned, seldomly
+                * Use cpu_tss_rw as a cacheline-aligned, seldomly
                 * accessed per-cpu variable as the monitor target.
                 */
-               __monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
+               __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
 
                /*
                 * AMD, like Intel, supports the EAX hint and EAX=0xf
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644 (file)
index 0000000..cb45c6c
--- /dev/null
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+       .section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86_indirect_thunk_\reg)
+       CFI_STARTPROC
+       JMP_NOSPEC %\reg
+       CFI_ENDPROC
+ENDPROC(__x86_indirect_thunk_\reg)
+.endm
+
+/*
+ * Despite being an assembler file we can't just use .irp here
+ * because __KSYM_DEPS__ only uses the C preprocessor and would
+ * only see one instance of "__x86_indirect_thunk_\reg" rather
+ * than one per register with the correct names. So we do it
+ * the simple and nasty way...
+ */
+#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+GENERATE_THUNK(_ASM_SP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
+#endif
index 8e13b8cc6bedb0dc84eea64cd80ca6ae39037eaa..27e9e90a8d3572b900ccaacae1623de803072b17 100644 (file)
@@ -10,7 +10,7 @@ CFLAGS_REMOVE_mem_encrypt.o   = -pg
 endif
 
 obj-y  :=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-           pat.o pgtable.o physaddr.o setup_nx.o tlb.o
+           pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
 
 # Make sure __phys_addr has no stackprotector
 nostackp := $(call cc-option, -fno-stack-protector)
@@ -41,9 +41,10 @@ obj-$(CONFIG_AMD_NUMA)               += amdtopology.o
 obj-$(CONFIG_ACPI_NUMA)                += srat.o
 obj-$(CONFIG_NUMA_EMU)         += numa_emulation.o
 
-obj-$(CONFIG_X86_INTEL_MPX)    += mpx.o
-obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
-obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_X86_INTEL_MPX)                    += mpx.o
+obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
+obj-$(CONFIG_RANDOMIZE_MEMORY)                 += kaslr.o
+obj-$(CONFIG_PAGE_TABLE_ISOLATION)             += pti.o
 
 obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)  += mem_encrypt_boot.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
new file mode 100644 (file)
index 0000000..b9283cc
--- /dev/null
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/pgtable.h>
+#include <asm/fixmap.h>
+#include <asm/desc.h>
+
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
+
+#ifdef CONFIG_X86_64
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+       [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+#endif
+
+struct cpu_entry_area *get_cpu_entry_area(int cpu)
+{
+       unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+       BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
+
+       return (struct cpu_entry_area *) va;
+}
+EXPORT_SYMBOL(get_cpu_entry_area);
+
+void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
+{
+       unsigned long va = (unsigned long) cea_vaddr;
+
+       set_pte_vaddr(va, pfn_pte(pa >> PAGE_SHIFT, flags));
+}
+
+static void __init
+cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
+{
+       for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
+               cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+}
+
+static void percpu_setup_debug_store(int cpu)
+{
+#ifdef CONFIG_CPU_SUP_INTEL
+       int npages;
+       void *cea;
+
+       if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+               return;
+
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
+       npages = sizeof(struct debug_store) / PAGE_SIZE;
+       BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
+       cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
+                            PAGE_KERNEL);
+
+       cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
+       /*
+        * Force the population of PMDs for not yet allocated per cpu
+        * memory like debug store buffers.
+        */
+       npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
+       for (; npages; npages--, cea += PAGE_SIZE)
+               cea_set_pte(cea, 0, PAGE_NONE);
+#endif
+}
+
+/* Setup the fixmap mappings only once per-processor */
+static void __init setup_cpu_entry_area(int cpu)
+{
+#ifdef CONFIG_X86_64
+       extern char _entry_trampoline[];
+
+       /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
+       pgprot_t gdt_prot = PAGE_KERNEL_RO;
+       pgprot_t tss_prot = PAGE_KERNEL_RO;
+#else
+       /*
+        * On native 32-bit systems, the GDT cannot be read-only because
+        * our double fault handler uses a task gate, and entering through
+        * a task gate needs to change an available TSS to busy.  If the
+        * GDT is read-only, that will triple fault.  The TSS cannot be
+        * read-only because the CPU writes to it on task switches.
+        *
+        * On Xen PV, the GDT must be read-only because the hypervisor
+        * requires it.
+        */
+       pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
+               PAGE_KERNEL_RO : PAGE_KERNEL;
+       pgprot_t tss_prot = PAGE_KERNEL;
+#endif
+
+       cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
+                   gdt_prot);
+
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+                            per_cpu_ptr(&entry_stack_storage, cpu), 1,
+                            PAGE_KERNEL);
+
+       /*
+        * The Intel SDM says (Volume 3, 7.2.1):
+        *
+        *  Avoid placing a page boundary in the part of the TSS that the
+        *  processor reads during a task switch (the first 104 bytes). The
+        *  processor may not correctly perform address translations if a
+        *  boundary occurs in this area. During a task switch, the processor
+        *  reads and writes into the first 104 bytes of each TSS (using
+        *  contiguous physical addresses beginning with the physical address
+        *  of the first byte of the TSS). So, after TSS access begins, if
+        *  part of the 104 bytes is not physically contiguous, the processor
+        *  will access incorrect information without generating a page-fault
+        *  exception.
+        *
+        * There are also a lot of errata involving the TSS spanning a page
+        * boundary.  Assert that we're not doing that.
+        */
+       BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
+                     offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
+       BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
+                            &per_cpu(cpu_tss_rw, cpu),
+                            sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
+
+#ifdef CONFIG_X86_32
+       per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+#endif
+
+#ifdef CONFIG_X86_64
+       BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+       BUILD_BUG_ON(sizeof(exception_stacks) !=
+                    sizeof(((struct cpu_entry_area *)0)->exception_stacks));
+       cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
+                            &per_cpu(exception_stacks, cpu),
+                            sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
+
+       cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
+                    __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
+#endif
+       percpu_setup_debug_store(cpu);
+}
+
+static __init void setup_cpu_entry_area_ptes(void)
+{
+#ifdef CONFIG_X86_32
+       unsigned long start, end;
+
+       BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+       BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
+
+       start = CPU_ENTRY_AREA_BASE;
+       end = start + CPU_ENTRY_AREA_MAP_SIZE;
+
+       /* Careful here: start + PMD_SIZE might wrap around */
+       for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
+               populate_extra_pte(start);
+#endif
+}
+
+void __init setup_cpu_entry_areas(void)
+{
+       unsigned int cpu;
+
+       setup_cpu_entry_area_ptes();
+
+       for_each_possible_cpu(cpu)
+               setup_cpu_entry_area(cpu);
+}
index bfcffdf6c5775f7ac5bd4c9f768573d7ae7bba55..421f2664ffa06e6cd4b31e5d6521648add3e2c4e 100644 (file)
@@ -5,7 +5,7 @@
 
 static int ptdump_show(struct seq_file *m, void *v)
 {
-       ptdump_walk_pgd_level(m, NULL);
+       ptdump_walk_pgd_level_debugfs(m, NULL, false);
        return 0;
 }
 
@@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
        .release        = single_release,
 };
 
-static struct dentry *pe;
+static int ptdump_show_curknl(struct seq_file *m, void *v)
+{
+       if (current->mm->pgd) {
+               down_read(&current->mm->mmap_sem);
+               ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
+               up_read(&current->mm->mmap_sem);
+       }
+       return 0;
+}
+
+static int ptdump_open_curknl(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, ptdump_show_curknl, NULL);
+}
+
+static const struct file_operations ptdump_curknl_fops = {
+       .owner          = THIS_MODULE,
+       .open           = ptdump_open_curknl,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+static struct dentry *pe_curusr;
+
+static int ptdump_show_curusr(struct seq_file *m, void *v)
+{
+       if (current->mm->pgd) {
+               down_read(&current->mm->mmap_sem);
+               ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
+               up_read(&current->mm->mmap_sem);
+       }
+       return 0;
+}
+
+static int ptdump_open_curusr(struct inode *inode, struct file *filp)
+{
+       return single_open(filp, ptdump_show_curusr, NULL);
+}
+
+static const struct file_operations ptdump_curusr_fops = {
+       .owner          = THIS_MODULE,
+       .open           = ptdump_open_curusr,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+#endif
+
+static struct dentry *dir, *pe_knl, *pe_curknl;
 
 static int __init pt_dump_debug_init(void)
 {
-       pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
-                                &ptdump_fops);
-       if (!pe)
+       dir = debugfs_create_dir("page_tables", NULL);
+       if (!dir)
                return -ENOMEM;
 
+       pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
+                                    &ptdump_fops);
+       if (!pe_knl)
+               goto err;
+
+       pe_curknl = debugfs_create_file("current_kernel", 0400,
+                                       dir, NULL, &ptdump_curknl_fops);
+       if (!pe_curknl)
+               goto err;
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       pe_curusr = debugfs_create_file("current_user", 0400,
+                                       dir, NULL, &ptdump_curusr_fops);
+       if (!pe_curusr)
+               goto err;
+#endif
        return 0;
+err:
+       debugfs_remove_recursive(dir);
+       return -ENOMEM;
 }
 
 static void __exit pt_dump_debug_exit(void)
 {
-       debugfs_remove_recursive(pe);
+       debugfs_remove_recursive(dir);
 }
 
 module_init(pt_dump_debug_init);
index 5e3ac6fe6c9e32ed1906f4f9bf736310a7193c7d..2a4849e92831b0f9771a2742b01c5a6c9c806a42 100644 (file)
@@ -44,68 +44,97 @@ struct addr_marker {
        unsigned long max_lines;
 };
 
-/* indices for address_markers; keep sync'd w/ address_markers below */
+/* Address space markers hints */
+
+#ifdef CONFIG_X86_64
+
 enum address_markers_idx {
        USER_SPACE_NR = 0,
-#ifdef CONFIG_X86_64
        KERNEL_SPACE_NR,
        LOW_KERNEL_NR,
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
+       LDT_NR,
+#endif
        VMALLOC_START_NR,
        VMEMMAP_START_NR,
 #ifdef CONFIG_KASAN
        KASAN_SHADOW_START_NR,
        KASAN_SHADOW_END_NR,
 #endif
-# ifdef CONFIG_X86_ESPFIX64
+       CPU_ENTRY_AREA_NR,
+#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
+       LDT_NR,
+#endif
+#ifdef CONFIG_X86_ESPFIX64
        ESPFIX_START_NR,
-# endif
+#endif
+#ifdef CONFIG_EFI
+       EFI_END_NR,
+#endif
        HIGH_KERNEL_NR,
        MODULES_VADDR_NR,
        MODULES_END_NR,
-#else
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
+};
+
+static struct addr_marker address_markers[] = {
+       [USER_SPACE_NR]         = { 0,                  "User Space" },
+       [KERNEL_SPACE_NR]       = { (1UL << 63),        "Kernel Space" },
+       [LOW_KERNEL_NR]         = { 0UL,                "Low Kernel Mapping" },
+       [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
+       [VMEMMAP_START_NR]      = { 0UL,                "Vmemmap" },
+#ifdef CONFIG_KASAN
+       [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
+       [KASAN_SHADOW_END_NR]   = { KASAN_SHADOW_END,   "KASAN shadow end" },
+#endif
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+       [LDT_NR]                = { LDT_BASE_ADDR,      "LDT remap" },
+#endif
+       [CPU_ENTRY_AREA_NR]     = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
+#ifdef CONFIG_X86_ESPFIX64
+       [ESPFIX_START_NR]       = { ESPFIX_BASE_ADDR,   "ESPfix Area", 16 },
+#endif
+#ifdef CONFIG_EFI
+       [EFI_END_NR]            = { EFI_VA_END,         "EFI Runtime Services" },
+#endif
+       [HIGH_KERNEL_NR]        = { __START_KERNEL_map, "High Kernel Mapping" },
+       [MODULES_VADDR_NR]      = { MODULES_VADDR,      "Modules" },
+       [MODULES_END_NR]        = { MODULES_END,        "End Modules" },
+       [FIXADDR_START_NR]      = { FIXADDR_START,      "Fixmap Area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
+};
+
+#else /* CONFIG_X86_64 */
+
+enum address_markers_idx {
+       USER_SPACE_NR = 0,
        KERNEL_SPACE_NR,
        VMALLOC_START_NR,
        VMALLOC_END_NR,
-# ifdef CONFIG_HIGHMEM
+#ifdef CONFIG_HIGHMEM
        PKMAP_BASE_NR,
-# endif
-       FIXADDR_START_NR,
 #endif
+       CPU_ENTRY_AREA_NR,
+       FIXADDR_START_NR,
+       END_OF_SPACE_NR,
 };
 
-/* Address space markers hints */
 static struct addr_marker address_markers[] = {
-       { 0, "User Space" },
-#ifdef CONFIG_X86_64
-       { 0x8000000000000000UL, "Kernel Space" },
-       { 0/* PAGE_OFFSET */,   "Low Kernel Mapping" },
-       { 0/* VMALLOC_START */, "vmalloc() Area" },
-       { 0/* VMEMMAP_START */, "Vmemmap" },
-#ifdef CONFIG_KASAN
-       { KASAN_SHADOW_START,   "KASAN shadow" },
-       { KASAN_SHADOW_END,     "KASAN shadow end" },
+       [USER_SPACE_NR]         = { 0,                  "User Space" },
+       [KERNEL_SPACE_NR]       = { PAGE_OFFSET,        "Kernel Mapping" },
+       [VMALLOC_START_NR]      = { 0UL,                "vmalloc() Area" },
+       [VMALLOC_END_NR]        = { 0UL,                "vmalloc() End" },
+#ifdef CONFIG_HIGHMEM
+       [PKMAP_BASE_NR]         = { 0UL,                "Persistent kmap() Area" },
 #endif
-# ifdef CONFIG_X86_ESPFIX64
-       { ESPFIX_BASE_ADDR,     "ESPfix Area", 16 },
-# endif
-# ifdef CONFIG_EFI
-       { EFI_VA_END,           "EFI Runtime Services" },
-# endif
-       { __START_KERNEL_map,   "High Kernel Mapping" },
-       { MODULES_VADDR,        "Modules" },
-       { MODULES_END,          "End Modules" },
-#else
-       { PAGE_OFFSET,          "Kernel Mapping" },
-       { 0/* VMALLOC_START */, "vmalloc() Area" },
-       { 0/*VMALLOC_END*/,     "vmalloc() End" },
-# ifdef CONFIG_HIGHMEM
-       { 0/*PKMAP_BASE*/,      "Persistent kmap() Area" },
-# endif
-       { 0/*FIXADDR_START*/,   "Fixmap Area" },
-#endif
-       { -1, NULL }            /* End of list */
+       [CPU_ENTRY_AREA_NR]     = { 0UL,                "CPU entry area" },
+       [FIXADDR_START_NR]      = { 0UL,                "Fixmap area" },
+       [END_OF_SPACE_NR]       = { -1,                 NULL }
 };
 
+#endif /* !CONFIG_X86_64 */
+
 /* Multipliers for offsets within the PTEs */
 #define PTE_LEVEL_MULT (PAGE_SIZE)
 #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
@@ -140,7 +169,7 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg)
        static const char * const level_name[] =
                { "cr3", "pgd", "p4d", "pud", "pmd", "pte" };
 
-       if (!pgprot_val(prot)) {
+       if (!(pr & _PAGE_PRESENT)) {
                /* Not present */
                pt_dump_cont_printf(m, dmsg, "                              ");
        } else {
@@ -447,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
 }
 
 static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
-                                      bool checkwx)
+                                      bool checkwx, bool dmesg)
 {
 #ifdef CONFIG_X86_64
        pgd_t *start = (pgd_t *) &init_top_pgt;
@@ -460,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 
        if (pgd) {
                start = pgd;
-               st.to_dmesg = true;
+               st.to_dmesg = dmesg;
        }
 
        st.check_wx = checkwx;
@@ -498,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
 
 void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
 {
-       ptdump_walk_pgd_level_core(m, pgd, false);
+       ptdump_walk_pgd_level_core(m, pgd, false, true);
+}
+
+void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       if (user && static_cpu_has(X86_FEATURE_PTI))
+               pgd = kernel_to_user_pgdp(pgd);
+#endif
+       ptdump_walk_pgd_level_core(m, pgd, false, false);
+}
+EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
+
+static void ptdump_walk_user_pgd_level_checkwx(void)
+{
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+       pgd_t *pgd = (pgd_t *) &init_top_pgt;
+
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       pr_info("x86/mm: Checking user space page tables\n");
+       pgd = kernel_to_user_pgdp(pgd);
+       ptdump_walk_pgd_level_core(NULL, pgd, true, false);
+#endif
 }
-EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
 
 void ptdump_walk_pgd_level_checkwx(void)
 {
-       ptdump_walk_pgd_level_core(NULL, NULL, true);
+       ptdump_walk_pgd_level_core(NULL, NULL, true, false);
+       ptdump_walk_user_pgd_level_checkwx();
 }
 
 static int __init pt_dump_init(void)
@@ -525,8 +578,8 @@ static int __init pt_dump_init(void)
        address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
 # endif
        address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
+       address_markers[CPU_ENTRY_AREA_NR].start_address = CPU_ENTRY_AREA_BASE;
 #endif
-
        return 0;
 }
 __initcall(pt_dump_init);
index febf6980e6535572f998cf2fa0ee63d296bdc6f1..06fe3d51d385b88111961c0b5addc673fcd597a2 100644 (file)
@@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+       printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), address,
                (void *)regs->ip, (void *)regs->sp, error_code);
index 6fdf91ef130a4737ab434ce98f77b2583fe1840d..82f5252c723a4a544593067981d90191c7b22c1d 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/kaslr.h>
 #include <asm/hypervisor.h>
 #include <asm/cpufeature.h>
+#include <asm/pti.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -160,6 +161,12 @@ struct map_range {
 
 static int page_size_mask;
 
+static void enable_global_pages(void)
+{
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               __supported_pte_mask |= _PAGE_GLOBAL;
+}
+
 static void __init probe_page_size_mask(void)
 {
        /*
@@ -177,11 +184,11 @@ static void __init probe_page_size_mask(void)
                cr4_set_bits_and_update_boot(X86_CR4_PSE);
 
        /* Enable PGE if available */
+       __supported_pte_mask &= ~_PAGE_GLOBAL;
        if (boot_cpu_has(X86_FEATURE_PGE)) {
                cr4_set_bits_and_update_boot(X86_CR4_PGE);
-               __supported_pte_mask |= _PAGE_GLOBAL;
-       } else
-               __supported_pte_mask &= ~_PAGE_GLOBAL;
+               enable_global_pages();
+       }
 
        /* Enable 1 GB linear kernel mappings if available: */
        if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
@@ -194,34 +201,44 @@ static void __init probe_page_size_mask(void)
 
 static void setup_pcid(void)
 {
-#ifdef CONFIG_X86_64
-       if (boot_cpu_has(X86_FEATURE_PCID)) {
-               if (boot_cpu_has(X86_FEATURE_PGE)) {
-                       /*
-                        * This can't be cr4_set_bits_and_update_boot() --
-                        * the trampoline code can't handle CR4.PCIDE and
-                        * it wouldn't do any good anyway.  Despite the name,
-                        * cr4_set_bits_and_update_boot() doesn't actually
-                        * cause the bits in question to remain set all the
-                        * way through the secondary boot asm.
-                        *
-                        * Instead, we brute-force it and set CR4.PCIDE
-                        * manually in start_secondary().
-                        */
-                       cr4_set_bits(X86_CR4_PCIDE);
-               } else {
-                       /*
-                        * flush_tlb_all(), as currently implemented, won't
-                        * work if PCID is on but PGE is not.  Since that
-                        * combination doesn't exist on real hardware, there's
-                        * no reason to try to fully support it, but it's
-                        * polite to avoid corrupting data if we're on
-                        * an improperly configured VM.
-                        */
-                       setup_clear_cpu_cap(X86_FEATURE_PCID);
-               }
+       if (!IS_ENABLED(CONFIG_X86_64))
+               return;
+
+       if (!boot_cpu_has(X86_FEATURE_PCID))
+               return;
+
+       if (boot_cpu_has(X86_FEATURE_PGE)) {
+               /*
+                * This can't be cr4_set_bits_and_update_boot() -- the
+                * trampoline code can't handle CR4.PCIDE and it wouldn't
+                * do any good anyway.  Despite the name,
+                * cr4_set_bits_and_update_boot() doesn't actually cause
+                * the bits in question to remain set all the way through
+                * the secondary boot asm.
+                *
+                * Instead, we brute-force it and set CR4.PCIDE manually in
+                * start_secondary().
+                */
+               cr4_set_bits(X86_CR4_PCIDE);
+
+               /*
+                * INVPCID's single-context modes (2/3) only work if we set
+                * X86_CR4_PCIDE, *and* we INVPCID support.  It's unusable
+                * on systems that have X86_CR4_PCIDE clear, or that have
+                * no INVPCID support at all.
+                */
+               if (boot_cpu_has(X86_FEATURE_INVPCID))
+                       setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
+       } else {
+               /*
+                * flush_tlb_all(), as currently implemented, won't work if
+                * PCID is on but PGE is not.  Since that combination
+                * doesn't exist on real hardware, there's no reason to try
+                * to fully support it, but it's polite to avoid corrupting
+                * data if we're on an improperly configured VM.
+                */
+               setup_clear_cpu_cap(X86_FEATURE_PCID);
        }
-#endif
 }
 
 #ifdef CONFIG_X86_32
@@ -622,6 +639,7 @@ void __init init_mem_mapping(void)
 {
        unsigned long end;
 
+       pti_check_boottime_disable();
        probe_page_size_mask();
        setup_pcid();
 
@@ -845,12 +863,12 @@ void __init zone_sizes_init(void)
        free_area_init_nodes(max_zone_pfns);
 }
 
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
        .loaded_mm = &init_mm,
        .next_asid = 1,
        .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
 };
-EXPORT_SYMBOL_GPL(cpu_tlbstate);
+EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
 
 void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
 {
index 8a64a6f2848d9be2e73a341f4d87ab2dc35de09f..135c9a7898c7da908f1340f9750774b4327e63b3 100644 (file)
@@ -50,6 +50,7 @@
 #include <asm/setup.h>
 #include <asm/set_memory.h>
 #include <asm/page_types.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/init.h>
 
 #include "mm_internal.h"
@@ -766,6 +767,7 @@ void __init mem_init(void)
        mem_init_print_info(NULL);
        printk(KERN_INFO "virtual kernel memory layout:\n"
                "    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+               "  cpu_entry : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #ifdef CONFIG_HIGHMEM
                "    pkmap   : 0x%08lx - 0x%08lx   (%4ld kB)\n"
 #endif
@@ -777,6 +779,10 @@ void __init mem_init(void)
                FIXADDR_START, FIXADDR_TOP,
                (FIXADDR_TOP - FIXADDR_START) >> 10,
 
+               CPU_ENTRY_AREA_BASE,
+               CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE,
+               CPU_ENTRY_AREA_MAP_SIZE >> 10,
+
 #ifdef CONFIG_HIGHMEM
                PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
                (LAST_PKMAP*PAGE_SIZE) >> 10,
index 99dfed6dfef8b2f9028f82b89ab8dc2bde8173c4..47388f0c0e59649ca3574d4e7c31b356dad7d247 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
+#include <asm/cpu_entry_area.h>
 
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
@@ -277,6 +278,7 @@ void __init kasan_early_init(void)
 void __init kasan_init(void)
 {
        int i;
+       void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
 
 #ifdef CONFIG_KASAN_INLINE
        register_die_notifier(&kasan_die_notifier);
@@ -321,16 +323,33 @@ void __init kasan_init(void)
                map_range(&pfn_mapped[i]);
        }
 
+       shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
+       shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
+       shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
+                                               PAGE_SIZE);
+
+       shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
+                                       CPU_ENTRY_AREA_MAP_SIZE);
+       shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
+       shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
+                                       PAGE_SIZE);
+
        kasan_populate_zero_shadow(
                kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
-               kasan_mem_to_shadow((void *)__START_KERNEL_map));
+               shadow_cpu_entry_begin);
+
+       kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
+                             (unsigned long)shadow_cpu_entry_end, 0);
+
+       kasan_populate_zero_shadow(shadow_cpu_entry_end,
+                               kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
        kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
                              (unsigned long)kasan_mem_to_shadow(_end),
                              early_pfn_to_nid(__pa(_stext)));
 
        kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
-                       (void *)KASAN_SHADOW_END);
+                               (void *)KASAN_SHADOW_END);
 
        load_cr3(init_top_pgt);
        __flush_tlb_all();
index 879ef930e2c2b04d60c85efec9f6e0fa5555fa2a..aedebd2ebf1ead0ff2b1b17816ff80d9fce88b8f 100644 (file)
 #define TB_SHIFT 40
 
 /*
- * Virtual address start and end range for randomization. The end changes base
- * on configuration to have the highest amount of space for randomization.
- * It increases the possible random position for each randomized region.
+ * Virtual address start and end range for randomization.
  *
- * You need to add an if/def entry if you introduce a new memory region
- * compatible with KASLR. Your entry must be in logical order with memory
- * layout. For example, ESPFIX is before EFI because its virtual address is
- * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
- * ensure that this order is correct and won't be changed.
+ * The end address could depend on more configuration options to make the
+ * highest amount of space for randomization available, but that's too hard
+ * to keep straight and caused issues already.
  */
 static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
-
-#if defined(CONFIG_X86_ESPFIX64)
-static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
-#elif defined(CONFIG_EFI)
-static const unsigned long vaddr_end = EFI_VA_END;
-#else
-static const unsigned long vaddr_end = __START_KERNEL_map;
-#endif
+static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
 
 /* Default values */
 unsigned long page_offset_base = __PAGE_OFFSET_BASE;
@@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void)
        unsigned long remain_entropy;
 
        /*
-        * All these BUILD_BUG_ON checks ensures the memory layout is
-        * consistent with the vaddr_start/vaddr_end variables.
+        * These BUILD_BUG_ON checks ensure the memory layout is consistent
+        * with the vaddr_start/vaddr_end variables. These checks are very
+        * limited....
         */
        BUILD_BUG_ON(vaddr_start >= vaddr_end);
-       BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
-                    vaddr_end >= EFI_VA_END);
-       BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
-                     IS_ENABLED(CONFIG_EFI)) &&
-                    vaddr_end >= __START_KERNEL_map);
+       BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
        BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
 
        if (!kaslr_memory_enabled())
index d9a9e9fc75dd7b511050adc6bd4c914b0c4fcdaa..391b13402e403041222ef8e6bd22f60edaeb9446 100644 (file)
@@ -405,13 +405,13 @@ bool sme_active(void)
 {
        return sme_me_mask && !sev_enabled;
 }
-EXPORT_SYMBOL_GPL(sme_active);
+EXPORT_SYMBOL(sme_active);
 
 bool sev_active(void)
 {
        return sme_me_mask && sev_enabled;
 }
-EXPORT_SYMBOL_GPL(sev_active);
+EXPORT_SYMBOL(sev_active);
 
 static const struct dma_map_ops sev_dma_ops = {
        .alloc                  = sev_alloc,
index 96d456a94b0342eb967f918e4233498ac24e4349..004abf9ebf1222c169448090f7f1c570635bce41 100644 (file)
@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
                kmem_cache_free(pgd_cache, pgd);
 }
 #else
+
 static inline pgd_t *_pgd_alloc(void)
 {
-       return (pgd_t *)__get_free_page(PGALLOC_GFP);
+       return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
 }
 
 static inline void _pgd_free(pgd_t *pgd)
 {
-       free_page((unsigned long)pgd);
+       free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
 }
 #endif /* CONFIG_X86_PAE */
 
index 6b9bf023a700559b87ae7ac89570d9bbd26d1f05..c3c5274410a908e762aed936406006d63c3116ac 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
 
+#include <asm/cpu_entry_area.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/fixmap.h>
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
new file mode 100644 (file)
index 0000000..ce38f16
--- /dev/null
@@ -0,0 +1,368 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This code is based in part on work published here:
+ *
+ *     https://github.com/IAIK/KAISER
+ *
+ * The original work was written by and and signed off by for the Linux
+ * kernel by:
+ *
+ *   Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
+ *   Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
+ *   Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
+ *   Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
+ *
+ * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
+ * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
+ *                    Andy Lutomirsky <luto@amacapital.net>
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+
+#include <asm/cpufeature.h>
+#include <asm/hypervisor.h>
+#include <asm/vsyscall.h>
+#include <asm/cmdline.h>
+#include <asm/pti.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
+
+/* Backporting helper */
+#ifndef __GFP_NOTRACK
+#define __GFP_NOTRACK  0
+#endif
+
+static void __init pti_print_if_insecure(const char *reason)
+{
+       if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               pr_info("%s\n", reason);
+}
+
+static void __init pti_print_if_secure(const char *reason)
+{
+       if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               pr_info("%s\n", reason);
+}
+
+void __init pti_check_boottime_disable(void)
+{
+       char arg[5];
+       int ret;
+
+       if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
+               pti_print_if_insecure("disabled on XEN PV.");
+               return;
+       }
+
+       ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+       if (ret > 0)  {
+               if (ret == 3 && !strncmp(arg, "off", 3)) {
+                       pti_print_if_insecure("disabled on command line.");
+                       return;
+               }
+               if (ret == 2 && !strncmp(arg, "on", 2)) {
+                       pti_print_if_secure("force enabled on command line.");
+                       goto enable;
+               }
+               if (ret == 4 && !strncmp(arg, "auto", 4))
+                       goto autosel;
+       }
+
+       if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+               pti_print_if_insecure("disabled on command line.");
+               return;
+       }
+
+autosel:
+       if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+               return;
+enable:
+       setup_force_cpu_cap(X86_FEATURE_PTI);
+}
+
+pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+       /*
+        * Changes to the high (kernel) portion of the kernelmode page
+        * tables are not automatically propagated to the usermode tables.
+        *
+        * Users should keep in mind that, unlike the kernelmode tables,
+        * there is no vmalloc_fault equivalent for the usermode tables.
+        * Top-level entries added to init_mm's usermode pgd after boot
+        * will not be automatically propagated to other mms.
+        */
+       if (!pgdp_maps_userspace(pgdp))
+               return pgd;
+
+       /*
+        * The user page tables get the full PGD, accessible from
+        * userspace:
+        */
+       kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
+
+       /*
+        * If this is normal user memory, make it NX in the kernel
+        * pagetables so that, if we somehow screw up and return to
+        * usermode with the kernel CR3 loaded, we'll get a page fault
+        * instead of allowing user code to execute with the wrong CR3.
+        *
+        * As exceptions, we don't set NX if:
+        *  - _PAGE_USER is not set.  This could be an executable
+        *     EFI runtime mapping or something similar, and the kernel
+        *     may execute from it
+        *  - we don't have NX support
+        *  - we're clearing the PGD (i.e. the new pgd is not present).
+        */
+       if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
+           (__supported_pte_mask & _PAGE_NX))
+               pgd.pgd |= _PAGE_NX;
+
+       /* return the copy of the PGD we want the kernel to use: */
+       return pgd;
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a P4D on success, or NULL on failure.
+ */
+static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
+{
+       pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+
+       if (address < PAGE_OFFSET) {
+               WARN_ONCE(1, "attempt to walk user address\n");
+               return NULL;
+       }
+
+       if (pgd_none(*pgd)) {
+               unsigned long new_p4d_page = __get_free_page(gfp);
+               if (!new_p4d_page)
+                       return NULL;
+
+               set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+       }
+       BUILD_BUG_ON(pgd_large(*pgd) != 0);
+
+       return p4d_offset(pgd, address);
+}
+
+/*
+ * Walk the user copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.
+ *
+ * Returns a pointer to a PMD on success, or NULL on failure.
+ */
+static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
+{
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+       p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
+       pud_t *pud;
+
+       BUILD_BUG_ON(p4d_large(*p4d) != 0);
+       if (p4d_none(*p4d)) {
+               unsigned long new_pud_page = __get_free_page(gfp);
+               if (!new_pud_page)
+                       return NULL;
+
+               set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
+       }
+
+       pud = pud_offset(p4d, address);
+       /* The user page tables do not use large mappings: */
+       if (pud_large(*pud)) {
+               WARN_ON(1);
+               return NULL;
+       }
+       if (pud_none(*pud)) {
+               unsigned long new_pmd_page = __get_free_page(gfp);
+               if (!new_pmd_page)
+                       return NULL;
+
+               set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
+       }
+
+       return pmd_offset(pud, address);
+}
+
+#ifdef CONFIG_X86_VSYSCALL_EMULATION
+/*
+ * Walk the shadow copy of the page tables (optionally) trying to allocate
+ * page table pages on the way down.  Does not support large pages.
+ *
+ * Note: this is only used when mapping *new* kernel data into the
+ * user/shadow page tables.  It is never used for userspace data.
+ *
+ * Returns a pointer to a PTE on success, or NULL on failure.
+ */
+static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
+{
+       gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
+       pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
+       pte_t *pte;
+
+       /* We can't do anything sensible if we hit a large mapping. */
+       if (pmd_large(*pmd)) {
+               WARN_ON(1);
+               return NULL;
+       }
+
+       if (pmd_none(*pmd)) {
+               unsigned long new_pte_page = __get_free_page(gfp);
+               if (!new_pte_page)
+                       return NULL;
+
+               set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
+       }
+
+       pte = pte_offset_kernel(pmd, address);
+       if (pte_flags(*pte) & _PAGE_USER) {
+               WARN_ONCE(1, "attempt to walk to user pte\n");
+               return NULL;
+       }
+       return pte;
+}
+
+static void __init pti_setup_vsyscall(void)
+{
+       pte_t *pte, *target_pte;
+       unsigned int level;
+
+       pte = lookup_address(VSYSCALL_ADDR, &level);
+       if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
+               return;
+
+       target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
+       if (WARN_ON(!target_pte))
+               return;
+
+       *target_pte = *pte;
+       set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
+}
+#else
+static void __init pti_setup_vsyscall(void) { }
+#endif
+
+static void __init
+pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
+{
+       unsigned long addr;
+
+       /*
+        * Clone the populated PMDs which cover start to end. These PMD areas
+        * can have holes.
+        */
+       for (addr = start; addr < end; addr += PMD_SIZE) {
+               pmd_t *pmd, *target_pmd;
+               pgd_t *pgd;
+               p4d_t *p4d;
+               pud_t *pud;
+
+               pgd = pgd_offset_k(addr);
+               if (WARN_ON(pgd_none(*pgd)))
+                       return;
+               p4d = p4d_offset(pgd, addr);
+               if (WARN_ON(p4d_none(*p4d)))
+                       return;
+               pud = pud_offset(p4d, addr);
+               if (pud_none(*pud))
+                       continue;
+               pmd = pmd_offset(pud, addr);
+               if (pmd_none(*pmd))
+                       continue;
+
+               target_pmd = pti_user_pagetable_walk_pmd(addr);
+               if (WARN_ON(!target_pmd))
+                       return;
+
+               /*
+                * Copy the PMD.  That is, the kernelmode and usermode
+                * tables will share the last-level page tables of this
+                * address range
+                */
+               *target_pmd = pmd_clear_flags(*pmd, clear);
+       }
+}
+
+/*
+ * Clone a single p4d (i.e. a top-level entry on 4-level systems and a
+ * next-level entry on 5-level systems.
+ */
+static void __init pti_clone_p4d(unsigned long addr)
+{
+       p4d_t *kernel_p4d, *user_p4d;
+       pgd_t *kernel_pgd;
+
+       user_p4d = pti_user_pagetable_walk_p4d(addr);
+       kernel_pgd = pgd_offset_k(addr);
+       kernel_p4d = p4d_offset(kernel_pgd, addr);
+       *user_p4d = *kernel_p4d;
+}
+
+/*
+ * Clone the CPU_ENTRY_AREA into the user space visible page table.
+ */
+static void __init pti_clone_user_shared(void)
+{
+       pti_clone_p4d(CPU_ENTRY_AREA_BASE);
+}
+
+/*
+ * Clone the ESPFIX P4D into the user space visinble page table
+ */
+static void __init pti_setup_espfix64(void)
+{
+#ifdef CONFIG_X86_ESPFIX64
+       pti_clone_p4d(ESPFIX_BASE_ADDR);
+#endif
+}
+
+/*
+ * Clone the populated PMDs of the entry and irqentry text and force it RO.
+ */
+static void __init pti_clone_entry_text(void)
+{
+       pti_clone_pmds((unsigned long) __entry_text_start,
+                       (unsigned long) __irqentry_text_end,
+                      _PAGE_RW | _PAGE_GLOBAL);
+}
+
+/*
+ * Initialize kernel page table isolation
+ */
+void __init pti_init(void)
+{
+       if (!static_cpu_has(X86_FEATURE_PTI))
+               return;
+
+       pr_info("enabled\n");
+
+       pti_clone_user_shared();
+       pti_clone_entry_text();
+       pti_setup_espfix64();
+       pti_setup_vsyscall();
+}
index 3118392cdf756bfc913d7a4137d5f7e0d46b046d..a1561957dccbb82d188d8209d76f7eddb780519a 100644 (file)
  *     Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
  */
 
+/*
+ * We get here when we do something requiring a TLB invalidation
+ * but could not go invalidate all of the contexts.  We do the
+ * necessary invalidation by clearing out the 'ctx_id' which
+ * forces a TLB flush when the context is loaded.
+ */
+void clear_asid_other(void)
+{
+       u16 asid;
+
+       /*
+        * This is only expected to be set if we have disabled
+        * kernel _PAGE_GLOBAL pages.
+        */
+       if (!static_cpu_has(X86_FEATURE_PTI)) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+
+       for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
+               /* Do not need to flush the current asid */
+               if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
+                       continue;
+               /*
+                * Make sure the next time we go to switch to
+                * this asid, we do a flush:
+                */
+               this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
+       }
+       this_cpu_write(cpu_tlbstate.invalidate_other, false);
+}
+
 atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
 
 
@@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
                return;
        }
 
+       if (this_cpu_read(cpu_tlbstate.invalidate_other))
+               clear_asid_other();
+
        for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
                if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
                    next->context.ctx_id)
@@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
        *need_flush = true;
 }
 
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+{
+       unsigned long new_mm_cr3;
+
+       if (need_flush) {
+               invalidate_user_asid(new_asid);
+               new_mm_cr3 = build_cr3(pgdir, new_asid);
+       } else {
+               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+       }
+
+       /*
+        * Caution: many callers of this function expect
+        * that load_cr3() is serializing and orders TLB
+        * fills with respect to the mm_cpumask writes.
+        */
+       write_cr3(new_mm_cr3);
+}
+
 void leave_mm(int cpu)
 {
        struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
@@ -128,7 +182,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         * isn't free.
         */
 #ifdef CONFIG_DEBUG_VM
-       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
+       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
                /*
                 * If we were to BUG here, we'd be very likely to kill
                 * the system so hard that we don't see the call trace.
@@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                if (need_flush) {
                        this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
                        this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-                       write_cr3(build_cr3(next, new_asid));
+                       load_new_mm_cr3(next->pgd, new_asid, true);
 
                        /*
                         * NB: This gets called via leave_mm() in the idle path
@@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                        trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
                } else {
                        /* The new ASID is already up to date. */
-                       write_cr3(build_cr3_noflush(next, new_asid));
+                       load_new_mm_cr3(next->pgd, new_asid, false);
 
                        /* See above wrt _rcuidle. */
                        trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
@@ -288,7 +342,7 @@ void initialize_tlbstate_and_flush(void)
                !(cr4_read_shadow() & X86_CR4_PCIDE));
 
        /* Force ASID 0 and force a TLB flush. */
-       write_cr3(build_cr3(mm, 0));
+       write_cr3(build_cr3(mm->pgd, 0));
 
        /* Reinitialize tlbstate. */
        this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
@@ -551,7 +605,7 @@ static void do_kernel_range_flush(void *info)
 
        /* flush range by one by one 'invlpg' */
        for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
-               __flush_tlb_single(addr);
+               __flush_tlb_one(addr);
 }
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
index 7a5350d08cef711a14c29cf1f8fcedb70ecc7465..563049c483a12c5fe587e463fb96bff4dde22c5a 100644 (file)
@@ -594,6 +594,11 @@ char *__init pcibios_setup(char *str)
        } else if (!strcmp(str, "nocrs")) {
                pci_probe |= PCI_ROOT_NO_CRS;
                return NULL;
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+       } else if (!strcmp(str, "big_root_window")) {
+               pci_probe |= PCI_BIG_ROOT_WINDOW;
+               return NULL;
+#endif
        } else if (!strcmp(str, "earlydump")) {
                pci_early_dump_regs = 1;
                return NULL;
index e663d6bf1328ebe2327c990f9d19557a49a2124a..f6a26e3cb4763325465df6ec19efb282e26e41fd 100644 (file)
@@ -662,10 +662,14 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
  */
 static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
 {
-       unsigned i;
        u32 base, limit, high;
-       struct resource *res, *conflict;
        struct pci_dev *other;
+       struct resource *res;
+       unsigned i;
+       int r;
+
+       if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
+               return;
 
        /* Check that we are the only device of that type */
        other = pci_get_device(dev->vendor, dev->device, NULL);
@@ -699,22 +703,25 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
        if (!res)
                return;
 
+       /*
+        * Allocate a 256GB window directly below the 0xfd00000000 hardware
+        * limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
+        */
        res->name = "PCI Bus 0000:00";
        res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
                IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
-       res->start = 0x100000000ull;
+       res->start = 0xbd00000000ull;
        res->end = 0xfd00000000ull - 1;
 
-       /* Just grab the free area behind system memory for this */
-       while ((conflict = request_resource_conflict(&iomem_resource, res))) {
-               if (conflict->end >= res->end) {
-                       kfree(res);
-                       return;
-               }
-               res->start = conflict->end + 1;
+       r = request_resource(&iomem_resource, res);
+       if (r) {
+               kfree(res);
+               return;
        }
 
-       dev_info(&dev->dev, "adding root bus resource %pR\n", res);
+       dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
+                res);
+       add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 
        base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
                AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
index 6a151ce70e865caadde95c859855c4b63283ad4b..2dd15e967c3f5257c530d53c7d4f51f2e3165190 100644 (file)
@@ -135,7 +135,9 @@ pgd_t * __init efi_call_phys_prolog(void)
                                pud[j] = *pud_offset(p4d_k, vaddr);
                        }
                }
+               pgd_offset_k(pgd * PGDIR_SIZE)->pgd &= ~_PAGE_NX;
        }
+
 out:
        __flush_tlb_all();
 
@@ -196,6 +198,9 @@ static pgd_t *efi_pgd;
  * because we want to avoid inserting EFI region mappings (EFI_VA_END
  * to EFI_VA_START) into the standard kernel page tables. Everything
  * else can be shared, see efi_sync_low_kernel_mappings().
+ *
+ * We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
+ * allocation.
  */
 int __init efi_alloc_page_tables(void)
 {
@@ -208,7 +213,7 @@ int __init efi_alloc_page_tables(void)
                return 0;
 
        gfp_mask = GFP_KERNEL | __GFP_ZERO;
-       efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
+       efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
        if (!efi_pgd)
                return -ENOMEM;
 
index 8a99a2e96537a91db6574955aafeae1a5103ce02..5b513ccffde404adbd1ea2929226669f2a282825 100644 (file)
@@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
        /*
         * Update the first page pointer to skip over the CSH header.
         */
-       cap_info->pages[0] += csh->headersize;
+       cap_info->phys[0] += csh->headersize;
+
+       /*
+        * cap_info->capsule should point at a virtual mapping of the entire
+        * capsule, starting at the capsule header. Our image has the Quark
+        * security header prepended, so we cannot rely on the default vmap()
+        * mapping created by the generic capsule code.
+        * Given that the Quark firmware does not appear to care about the
+        * virtual mapping, let's just point cap_info->capsule at our copy
+        * of the capsule header.
+        */
+       cap_info->capsule = &cap_info->header;
 
        return 1;
 }
index dc036e511f48d3adc3261ca4d4214c1c6244dbe6..5a0483e7bf662cb27044b7684567b644dfe49b81 100644 (file)
@@ -60,7 +60,7 @@ static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
        return 0;
 }
 
-static const struct bt_sfi_data tng_bt_sfi_data __initdata = {
+static struct bt_sfi_data tng_bt_sfi_data __initdata = {
        .setup  = tng_bt_sfi_setup,
 };
 
index f44c0bc95aa2f45ad42462a5f23f4db4672d1257..8538a6723171a5606058a8823ed1cbb2d343fdb6 100644 (file)
@@ -299,7 +299,7 @@ static void bau_process_message(struct msg_desc *mdp, struct bau_control *bcp,
                local_flush_tlb();
                stat->d_alltlb++;
        } else {
-               __flush_tlb_one(msg->address);
+               __flush_tlb_single(msg->address);
                stat->d_onetlb++;
        }
        stat->d_requestee++;
index 5f6fd860820a3c5175609a5a3468262aac937604..e4cb9f4cde8ae224afe51ced1970805df293c956 100644 (file)
@@ -128,7 +128,7 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
  * on the specified blade to allow the sending of MSIs to the specified CPU.
  */
 static int uv_domain_activate(struct irq_domain *domain,
-                             struct irq_data *irq_data, bool early)
+                             struct irq_data *irq_data, bool reserve)
 {
        uv_program_mmr(irqd_cfg(irq_data), irq_data->chip_data);
        return 0;
index 36a28eddb435e72d2abc5ffbdd1e78a46b56876e..a7d966964c6f20577c927cf5e618bc86b3331977 100644 (file)
@@ -152,17 +152,19 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
        int cpu = smp_processor_id();
-       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
        struct desc_struct *desc = get_cpu_gdt_rw(cpu);
        tss_desc tss;
 #endif
-       set_tss_desc(cpu, t);   /*
-                                * This just modifies memory; should not be
-                                * necessary. But... This is necessary, because
-                                * 386 hardware has concept of busy TSS or some
-                                * similar stupidity.
-                                */
+
+       /*
+        * We need to reload TR, which requires that we change the
+        * GDT entry to indicate "available" first.
+        *
+        * XXX: This could probably all be replaced by a call to
+        * force_reload_TR().
+        */
+       set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
 
 #ifdef CONFIG_X86_64
        memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
index d669e9d890017770456abe458f1161eb2509c09e..c9081c6671f0b7a05ecfaaf206e7e1ed2b1f456a 100644 (file)
@@ -1,8 +1,12 @@
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+#include <linux/bootmem.h>
+#endif
 #include <linux/cpu.h>
 #include <linux/kexec.h>
 
 #include <xen/features.h>
 #include <xen/page.h>
+#include <xen/interface/memory.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -331,3 +335,80 @@ void xen_arch_unregister_cpu(int num)
 }
 EXPORT_SYMBOL(xen_arch_unregister_cpu);
 #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+void __init arch_xen_balloon_init(struct resource *hostmem_resource)
+{
+       struct xen_memory_map memmap;
+       int rc;
+       unsigned int i, last_guest_ram;
+       phys_addr_t max_addr = PFN_PHYS(max_pfn);
+       struct e820_table *xen_e820_table;
+       const struct e820_entry *entry;
+       struct resource *res;
+
+       if (!xen_initial_domain())
+               return;
+
+       xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
+       if (!xen_e820_table)
+               return;
+
+       memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
+       set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
+       rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
+       if (rc) {
+               pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
+               goto out;
+       }
+
+       last_guest_ram = 0;
+       for (i = 0; i < memmap.nr_entries; i++) {
+               if (xen_e820_table->entries[i].addr >= max_addr)
+                       break;
+               if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
+                       last_guest_ram = i;
+       }
+
+       entry = &xen_e820_table->entries[last_guest_ram];
+       if (max_addr >= entry->addr + entry->size)
+               goto out; /* No unallocated host RAM. */
+
+       hostmem_resource->start = max_addr;
+       hostmem_resource->end = entry->addr + entry->size;
+
+       /*
+        * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
+        * as unavailable. The rest of that region can be used for hotplug-based
+        * ballooning.
+        */
+       for (; i < memmap.nr_entries; i++) {
+               entry = &xen_e820_table->entries[i];
+
+               if (entry->type == E820_TYPE_RAM)
+                       continue;
+
+               if (entry->addr >= hostmem_resource->end)
+                       break;
+
+               res = kzalloc(sizeof(*res), GFP_KERNEL);
+               if (!res)
+                       goto out;
+
+               res->name = "Unavailable host RAM";
+               res->start = entry->addr;
+               res->end = (entry->addr + entry->size < hostmem_resource->end) ?
+                           entry->addr + entry->size : hostmem_resource->end;
+               rc = insert_resource(hostmem_resource, res);
+               if (rc) {
+                       pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
+                               __func__, res->start, res->end, rc);
+                       kfree(res);
+                       goto  out;
+               }
+       }
+
+ out:
+       kfree(xen_e820_table);
+}
+#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
index f2414c6c5e7c455b43fc45773fbd1264cf86c24e..c047f42552e1a61ed0a5787d904681974cc05af1 100644 (file)
@@ -88,6 +88,8 @@
 #include "multicalls.h"
 #include "pmu.h"
 
+#include "../kernel/cpu/cpu.h" /* get_cpu_cap() */
+
 void *xen_initial_gdt;
 
 static int xen_cpu_up_prepare_pv(unsigned int cpu);
@@ -826,7 +828,7 @@ static void xen_load_sp0(unsigned long sp0)
        mcs = xen_mc_entry(0);
        MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
        xen_mc_issue(PARAVIRT_LAZY_CPU);
-       this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
+       this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
 }
 
 void xen_set_iopl_mask(unsigned mask)
@@ -1258,6 +1260,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
        __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
 
        /* Work out if we support NX */
+       get_cpu_cap(&boot_cpu_data);
        x86_configure_nx();
 
        /* Get mfn list */
index fc048ec686e7699b263254c79b482ccf935c21ef..d85076223a696d0b00bee7c22a9e28b1e66dc975 100644 (file)
@@ -1325,20 +1325,18 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 {
        struct {
                struct mmuext_op op;
-#ifdef CONFIG_SMP
-               DECLARE_BITMAP(mask, num_processors);
-#else
                DECLARE_BITMAP(mask, NR_CPUS);
-#endif
        } *args;
        struct multicall_space mcs;
+       const size_t mc_entry_size = sizeof(args->op) +
+               sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
 
        trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
 
        if (cpumask_empty(cpus))
                return;         /* nothing to do */
 
-       mcs = xen_mc_entry(sizeof(*args));
+       mcs = xen_mc_entry(mc_entry_size);
        args = mcs.args;
        args->op.arg2.vcpumask = to_cpumask(args->mask);
 
@@ -1902,6 +1900,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
        /* Graft it onto L4[511][510] */
        copy_page(level2_kernel_pgt, l2);
 
+       /*
+        * Zap execute permission from the ident map. Due to the sharing of
+        * L1 entries we need to do this in the L2.
+        */
+       if (__supported_pte_mask & _PAGE_NX) {
+               for (i = 0; i < PTRS_PER_PMD; ++i) {
+                       if (pmd_none(level2_ident_pgt[i]))
+                               continue;
+                       level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
+               }
+       }
+
        /* Copy the initial P->M table mappings if necessary. */
        i = pgd_index(xen_start_info->mfn_list);
        if (i && i < pgd_index(__START_KERNEL_map))
@@ -2261,7 +2271,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 
        switch (idx) {
        case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-       case FIX_RO_IDT:
 #ifdef CONFIG_X86_32
        case FIX_WP_TEST:
 # ifdef CONFIG_HIGHMEM
@@ -2272,7 +2281,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #endif
        case FIX_TEXT_POKE0:
        case FIX_TEXT_POKE1:
-       case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
                /* All local page mappings */
                pte = pfn_pte(phys, prot);
                break;
index c114ca767b3b8a382918e2b0160983fa257318db..6e0d2086eacbf37326467b5142e59750151a5328 100644 (file)
@@ -808,7 +808,6 @@ char * __init xen_memory_setup(void)
        addr = xen_e820_table.entries[0].addr;
        size = xen_e820_table.entries[0].size;
        while (i < xen_e820_table.nr_entries) {
-               bool discard = false;
 
                chunk_size = size;
                type = xen_e820_table.entries[i].type;
@@ -824,11 +823,10 @@ char * __init xen_memory_setup(void)
                                xen_add_extra_mem(pfn_s, n_pfns);
                                xen_max_p2m_pfn = pfn_s + n_pfns;
                        } else
-                               discard = true;
+                               type = E820_TYPE_UNUSABLE;
                }
 
-               if (!discard)
-                       xen_align_and_add_e820_region(addr, chunk_size, type);
+               xen_align_and_add_e820_region(addr, chunk_size, type);
 
                addr += chunk_size;
                size -= chunk_size;
index 75011b80660f6262206b6759e7b63a06a1d809ea..3b34745d0a52dd097d9c2d8955bd4afce1d24ee1 100644 (file)
@@ -72,7 +72,7 @@ u64 xen_clocksource_read(void);
 void xen_setup_cpu_clockevents(void);
 void xen_save_time_memory_area(void);
 void xen_restore_time_memory_area(void);
-void __init xen_init_time_ops(void);
+void __ref xen_init_time_ops(void);
 void __init xen_hvm_init_time_ops(void);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
index 8bfdea58159ba9ffd972dd95717e0eee99101e0a..9ef6cf3addb38cae822d0e5c5ef18ba9e98cd2d7 100644 (file)
@@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
        bio->bi_disk = bio_src->bi_disk;
        bio->bi_partno = bio_src->bi_partno;
        bio_set_flag(bio, BIO_CLONED);
+       if (bio_flagged(bio_src, BIO_THROTTLED))
+               bio_set_flag(bio, BIO_THROTTLED);
        bio->bi_opf = bio_src->bi_opf;
        bio->bi_write_hint = bio_src->bi_write_hint;
        bio->bi_iter = bio_src->bi_iter;
index b8881750a3acd705789050c845c942673da999a8..3ba4326a63b59632fad81e686bf8229eee07320b 100644 (file)
@@ -562,6 +562,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
        }
 }
 
+void blk_drain_queue(struct request_queue *q)
+{
+       spin_lock_irq(q->queue_lock);
+       __blk_drain_queue(q, true);
+       spin_unlock_irq(q->queue_lock);
+}
+
 /**
  * blk_queue_bypass_start - enter queue bypass mode
  * @q: queue of interest
@@ -689,8 +696,6 @@ void blk_cleanup_queue(struct request_queue *q)
         */
        blk_freeze_queue(q);
        spin_lock_irq(lock);
-       if (!q->mq_ops)
-               __blk_drain_queue(q, true);
        queue_flag_set(QUEUE_FLAG_DEAD, q);
        spin_unlock_irq(lock);
 
index b21f8e86f1207f9b76bf3e2083fcf72b5062f0b7..d3a94719f03fb2af81d6270d6fc9ed58f0dde373 100644 (file)
 #include "blk.h"
 
 /*
- * Append a bio to a passthrough request.  Only works can be merged into
- * the request based on the driver constraints.
+ * Append a bio to a passthrough request.  Only works if the bio can be merged
+ * into the request based on the driver constraints.
  */
-int blk_rq_append_bio(struct request *rq, struct bio *bio)
+int blk_rq_append_bio(struct request *rq, struct bio **bio)
 {
-       blk_queue_bounce(rq->q, &bio);
+       struct bio *orig_bio = *bio;
+
+       blk_queue_bounce(rq->q, bio);
 
        if (!rq->bio) {
-               blk_rq_bio_prep(rq->q, rq, bio);
+               blk_rq_bio_prep(rq->q, rq, *bio);
        } else {
-               if (!ll_back_merge_fn(rq->q, rq, bio))
+               if (!ll_back_merge_fn(rq->q, rq, *bio)) {
+                       if (orig_bio != *bio) {
+                               bio_put(*bio);
+                               *bio = orig_bio;
+                       }
                        return -EINVAL;
+               }
 
-               rq->biotail->bi_next = bio;
-               rq->biotail = bio;
-               rq->__data_len += bio->bi_iter.bi_size;
+               rq->biotail->bi_next = *bio;
+               rq->biotail = *bio;
+               rq->__data_len += (*bio)->bi_iter.bi_size;
        }
 
        return 0;
@@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
         * We link the bounce buffer in and could have to traverse it
         * later so we have to get a ref to prevent it from being freed
         */
-       ret = blk_rq_append_bio(rq, bio);
-       bio_get(bio);
+       ret = blk_rq_append_bio(rq, &bio);
        if (ret) {
-               bio_endio(bio);
                __blk_rq_unmap_user(orig_bio);
-               bio_put(bio);
                return ret;
        }
+       bio_get(bio);
 
        return 0;
 }
@@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        int reading = rq_data_dir(rq) == READ;
        unsigned long addr = (unsigned long) kbuf;
        int do_copy = 0;
-       struct bio *bio;
+       struct bio *bio, *orig_bio;
        int ret;
 
        if (len > (queue_max_hw_sectors(q) << 9))
@@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        if (do_copy)
                rq->rq_flags |= RQF_COPY_USER;
 
-       ret = blk_rq_append_bio(rq, bio);
+       orig_bio = bio;
+       ret = blk_rq_append_bio(rq, &bio);
        if (unlikely(ret)) {
                /* request is too big */
-               bio_put(bio);
+               bio_put(orig_bio);
                return ret;
        }
 
index 11097477eeab6591088ca817d4690535e114e699..3d379732749175ece7ae39e427e115f51621c521 100644 (file)
@@ -161,6 +161,8 @@ void blk_freeze_queue(struct request_queue *q)
         * exported to drivers as the only user for unfreeze is blk_mq.
         */
        blk_freeze_queue_start(q);
+       if (!q->mq_ops)
+               blk_drain_queue(q);
        blk_mq_freeze_queue_wait(q);
 }
 
index 825bc29767e6699ac85675d319a9866b70cc9b84..d19f416d61012ac032c49608f0afe463c948e8bc 100644 (file)
@@ -2226,13 +2226,7 @@ again:
 out_unlock:
        spin_unlock_irq(q->queue_lock);
 out:
-       /*
-        * As multiple blk-throtls may stack in the same issue path, we
-        * don't want bios to leave with the flag set.  Clear the flag if
-        * being issued.
-        */
-       if (!throttled)
-               bio_clear_flag(bio, BIO_THROTTLED);
+       bio_set_flag(bio, BIO_THROTTLED);
 
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
        if (throttled || !td->track_bio_latency)
index 3f1446937aece26f38ceb66cf4e3d159a23df871..442098aa9463a37dad0dfccb1718eea65be6cdb3 100644 (file)
@@ -330,4 +330,6 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
 }
 #endif /* CONFIG_BOUNCE */
 
+extern void blk_drain_queue(struct request_queue *q);
+
 #endif /* BLK_INTERNAL_H */
index fceb1a96480bfb9600e4664fa2b4992c8bb64210..1d05c422c932ad56d705f94deed6cce0891ff9d3 100644 (file)
@@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        unsigned i = 0;
        bool bounce = false;
        int sectors = 0;
+       bool passthrough = bio_is_passthrough(*bio_orig);
 
        bio_for_each_segment(from, *bio_orig, iter) {
                if (i++ < BIO_MAX_PAGES)
@@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        if (!bounce)
                return;
 
-       if (sectors < bio_sectors(*bio_orig)) {
+       if (!passthrough && sectors < bio_sectors(*bio_orig)) {
                bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
                bio_chain(bio, *bio_orig);
                generic_make_request(*bio_orig);
                *bio_orig = bio;
        }
-       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
+       bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
+                       bounce_bio_set);
 
        bio_for_each_segment_all(to, bio, i) {
                struct page *page = to->bv_page;
index b4df317c291692f01138b91608dc6c80f71bb9aa..f95c60774ce8ca613417d3ccf54bee52010752ee 100644 (file)
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
        unsigned int cur_domain;
        unsigned int batching;
        wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
+       struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
        atomic_t wait_index[KYBER_NUM_DOMAINS];
 };
 
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
+                            void *key);
+
 static int rq_sched_domain(const struct request *rq)
 {
        unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 
        for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
                INIT_LIST_HEAD(&khd->rqs[i]);
+               init_waitqueue_func_entry(&khd->domain_wait[i],
+                                         kyber_domain_wake);
+               khd->domain_wait[i].private = hctx;
                INIT_LIST_HEAD(&khd->domain_wait[i].entry);
                atomic_set(&khd->wait_index[i], 0);
        }
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
        int nr;
 
        nr = __sbitmap_queue_get(domain_tokens);
-       if (nr >= 0)
-               return nr;
 
        /*
         * If we failed to get a domain token, make sure the hardware queue is
         * run when one becomes available. Note that this is serialized on
         * khd->lock, but we still need to be careful about the waker.
         */
-       if (list_empty_careful(&wait->entry)) {
-               init_waitqueue_func_entry(wait, kyber_domain_wake);
-               wait->private = hctx;
+       if (nr < 0 && list_empty_careful(&wait->entry)) {
                ws = sbq_wait_ptr(domain_tokens,
                                  &khd->wait_index[sched_domain]);
+               khd->domain_ws[sched_domain] = ws;
                add_wait_queue(&ws->wait, wait);
 
                /*
                 * Try again in case a token was freed before we got on the wait
-                * queue. The waker may have already removed the entry from the
-                * wait queue, but list_del_init() is okay with that.
+                * queue.
                 */
                nr = __sbitmap_queue_get(domain_tokens);
-               if (nr >= 0) {
-                       unsigned long flags;
+       }
 
-                       spin_lock_irqsave(&ws->wait.lock, flags);
-                       list_del_init(&wait->entry);
-                       spin_unlock_irqrestore(&ws->wait.lock, flags);
-               }
+       /*
+        * If we got a token while we were on the wait queue, remove ourselves
+        * from the wait queue to ensure that all wake ups make forward
+        * progress. It's possible that the waker already deleted the entry
+        * between the !list_empty_careful() check and us grabbing the lock, but
+        * list_del_init() is okay with that.
+        */
+       if (nr >= 0 && !list_empty_careful(&wait->entry)) {
+               ws = khd->domain_ws[sched_domain];
+               spin_lock_irq(&ws->wait.lock);
+               list_del_init(&wait->entry);
+               spin_unlock_irq(&ws->wait.lock);
        }
+
        return nr;
 }
 
index 415a54ced4d6a490ae1e09170c8b80ef3eef135e..35d4dcea381fbee7aa312667aa235456bb9d7918 100644 (file)
@@ -664,7 +664,7 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
        unsigned int i;
 
        list_for_each_entry_safe(rsgl, tmp, &areq->rsgl_list, list) {
-               ctx->rcvused -= rsgl->sg_num_bytes;
+               atomic_sub(rsgl->sg_num_bytes, &ctx->rcvused);
                af_alg_free_sg(&rsgl->sgl);
                list_del(&rsgl->list);
                if (rsgl != &areq->first_rsgl)
@@ -1138,12 +1138,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
                if (!af_alg_readable(sk))
                        break;
 
-               if (!ctx->used) {
-                       err = af_alg_wait_for_data(sk, flags);
-                       if (err)
-                               return err;
-               }
-
                seglen = min_t(size_t, (maxsize - len),
                               msg_data_left(msg));
 
@@ -1169,7 +1163,7 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
 
                areq->last_rsgl = rsgl;
                len += err;
-               ctx->rcvused += err;
+               atomic_add(err, &ctx->rcvused);
                rsgl->sg_num_bytes = err;
                iov_iter_advance(&msg->msg_iter, err);
        }
index 60d7366ed343e9ad6a76b9dd36be22fbd7543c89..9a636f961572b99af844b1d8a28bebfd9463c9e2 100644 (file)
@@ -167,6 +167,18 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 
                        spawn->alg = NULL;
                        spawns = &inst->alg.cra_users;
+
+                       /*
+                        * We may encounter an unregistered instance here, since
+                        * an instance's spawns are set up prior to the instance
+                        * being registered.  An unregistered instance will have
+                        * NULL ->cra_users.next, since ->cra_users isn't
+                        * properly initialized until registration.  But an
+                        * unregistered instance cannot have any users, so treat
+                        * it the same as ->cra_users being empty.
+                        */
+                       if (spawns->next == NULL)
+                               break;
                }
        } while ((spawns = crypto_more_spawns(alg, &stack, &top,
                                              &secondary_spawns)));
index 48b34e9c68342c55610ad83900557dc1c785af41..e9885a35ef6e2372a393cf3f0445924f9f044788 100644 (file)
@@ -111,6 +111,12 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
        size_t usedpages = 0;           /* [in]  RX bufs to be used from user */
        size_t processed = 0;           /* [in]  TX bufs to be consumed */
 
+       if (!ctx->used) {
+               err = af_alg_wait_for_data(sk, flags);
+               if (err)
+                       return err;
+       }
+
        /*
         * Data length provided by caller via sendmsg/sendpage that has not
         * yet been processed.
@@ -285,6 +291,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                /* AIO operation */
                sock_hold(sk);
                areq->iocb = msg->msg_iocb;
+
+               /* Remember output size that will be generated. */
+               areq->outlen = outlen;
+
                aead_request_set_callback(&areq->cra_u.aead_req,
                                          CRYPTO_TFM_REQ_MAY_BACKLOG,
                                          af_alg_async_cb, areq);
@@ -292,12 +302,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
                                 crypto_aead_decrypt(&areq->cra_u.aead_req);
 
                /* AIO operation in progress */
-               if (err == -EINPROGRESS || err == -EBUSY) {
-                       /* Remember output size that will be generated. */
-                       areq->outlen = outlen;
-
+               if (err == -EINPROGRESS || err == -EBUSY)
                        return -EIOCBQUEUED;
-               }
 
                sock_put(sk);
        } else {
@@ -565,7 +571,7 @@ static int aead_accept_parent_nokey(void *private, struct sock *sk)
        INIT_LIST_HEAD(&ctx->tsgl_list);
        ctx->len = len;
        ctx->used = 0;
-       ctx->rcvused = 0;
+       atomic_set(&ctx->rcvused, 0);
        ctx->more = 0;
        ctx->merge = 0;
        ctx->enc = 0;
index 30cff827dd8fff048fa3e2ca7de770ab73022749..c5c47b680152034581957dc13ac1884f0c69f546 100644 (file)
@@ -72,6 +72,12 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
        int err = 0;
        size_t len = 0;
 
+       if (!ctx->used) {
+               err = af_alg_wait_for_data(sk, flags);
+               if (err)
+                       return err;
+       }
+
        /* Allocate cipher request for current operation. */
        areq = af_alg_alloc_areq(sk, sizeof(struct af_alg_async_req) +
                                     crypto_skcipher_reqsize(tfm));
@@ -119,6 +125,10 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
                /* AIO operation */
                sock_hold(sk);
                areq->iocb = msg->msg_iocb;
+
+               /* Remember output size that will be generated. */
+               areq->outlen = len;
+
                skcipher_request_set_callback(&areq->cra_u.skcipher_req,
                                              CRYPTO_TFM_REQ_MAY_SLEEP,
                                              af_alg_async_cb, areq);
@@ -127,12 +137,8 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
                        crypto_skcipher_decrypt(&areq->cra_u.skcipher_req);
 
                /* AIO operation in progress */
-               if (err == -EINPROGRESS || err == -EBUSY) {
-                       /* Remember output size that will be generated. */
-                       areq->outlen = len;
-
+               if (err == -EINPROGRESS || err == -EBUSY)
                        return -EIOCBQUEUED;
-               }
 
                sock_put(sk);
        } else {
@@ -384,7 +390,7 @@ static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
        INIT_LIST_HEAD(&ctx->tsgl_list);
        ctx->len = len;
        ctx->used = 0;
-       ctx->rcvused = 0;
+       atomic_set(&ctx->rcvused, 0);
        ctx->more = 0;
        ctx->merge = 0;
        ctx->enc = 0;
index db1bc3147bc4708b5a4e16c36844e5452101bf64..600afa99941fe07470b74f3c142b59716656df92 100644 (file)
@@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
                                                    algt->mask));
        if (IS_ERR(poly))
                return PTR_ERR(poly);
+       poly_hash = __crypto_hash_alg_common(poly);
+
+       err = -EINVAL;
+       if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
+               goto out_put_poly;
 
        err = -ENOMEM;
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
@@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
 
        ctx = aead_instance_ctx(inst);
        ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
-       poly_hash = __crypto_hash_alg_common(poly);
        err = crypto_init_ahash_spawn(&ctx->poly, poly_hash,
                                      aead_crypto_instance(inst));
        if (err)
index 4e64726588524f137acd590809bef11673695ed2..eca04d3729b37c696c2dac4b0ac472422f30615d 100644 (file)
@@ -81,6 +81,7 @@ static int mcryptd_init_queue(struct mcryptd_queue *queue,
                pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
                crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
                INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
+               spin_lock_init(&cpu_queue->q_lock);
        }
        return 0;
 }
@@ -104,15 +105,16 @@ static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
        int cpu, err;
        struct mcryptd_cpu_queue *cpu_queue;
 
-       cpu = get_cpu();
-       cpu_queue = this_cpu_ptr(queue->cpu_queue);
-       rctx->tag.cpu = cpu;
+       cpu_queue = raw_cpu_ptr(queue->cpu_queue);
+       spin_lock(&cpu_queue->q_lock);
+       cpu = smp_processor_id();
+       rctx->tag.cpu = smp_processor_id();
 
        err = crypto_enqueue_request(&cpu_queue->queue, request);
        pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
                 cpu, cpu_queue, request);
+       spin_unlock(&cpu_queue->q_lock);
        queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
-       put_cpu();
 
        return err;
 }
@@ -161,16 +163,11 @@ static void mcryptd_queue_worker(struct work_struct *work)
        cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
        i = 0;
        while (i < MCRYPTD_BATCH || single_task_running()) {
-               /*
-                * preempt_disable/enable is used to prevent
-                * being preempted by mcryptd_enqueue_request()
-                */
-               local_bh_disable();
-               preempt_disable();
+
+               spin_lock_bh(&cpu_queue->q_lock);
                backlog = crypto_get_backlog(&cpu_queue->queue);
                req = crypto_dequeue_request(&cpu_queue->queue);
-               preempt_enable();
-               local_bh_enable();
+               spin_unlock_bh(&cpu_queue->q_lock);
 
                if (!req) {
                        mcryptd_opportunistic_flush();
@@ -185,7 +182,7 @@ static void mcryptd_queue_worker(struct work_struct *work)
                ++i;
        }
        if (cpu_queue->queue.qlen)
-               queue_work(kcrypto_wq, &cpu_queue->work);
+               queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
 }
 
 void mcryptd_flusher(struct work_struct *__work)
index ee9cfb99fe256af06ae7ad5d946c3b76d90de1e9..f8ec3d4ba4a80f8eefed739d9e8a852865a7ac02 100644 (file)
@@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm)
        crypto_free_aead(ctx->child);
 }
 
+static void pcrypt_free(struct aead_instance *inst)
+{
+       struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);
+
+       crypto_drop_aead(&ctx->spawn);
+       kfree(inst);
+}
+
 static int pcrypt_init_instance(struct crypto_instance *inst,
                                struct crypto_alg *alg)
 {
@@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
        inst->alg.encrypt = pcrypt_aead_encrypt;
        inst->alg.decrypt = pcrypt_aead_decrypt;
 
+       inst->free = pcrypt_free;
+
        err = aead_register_instance(tmpl, inst);
        if (err)
                goto out_drop_aead;
@@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb)
        return -EINVAL;
 }
 
-static void pcrypt_free(struct crypto_instance *inst)
-{
-       struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
-
-       crypto_drop_aead(&ctx->spawn);
-       kfree(inst);
-}
-
 static int pcrypt_cpumask_change_notify(struct notifier_block *self,
                                        unsigned long val, void *data)
 {
@@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
 static struct crypto_template pcrypt_tmpl = {
        .name = "pcrypt",
        .create = pcrypt_create,
-       .free = pcrypt_free,
        .module = THIS_MODULE,
 };
 
index 778e0ff42bfa801eda5be848da9e6747ebbc2626..11af5fd6a443570550e1dac5b0a429b2cae801b1 100644 (file)
@@ -449,6 +449,8 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
 
        walk->total = req->cryptlen;
        walk->nbytes = 0;
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
 
        if (unlikely(!walk->total))
                return 0;
@@ -456,9 +458,6 @@ static int skcipher_walk_skcipher(struct skcipher_walk *walk,
        scatterwalk_start(&walk->in, req->src);
        scatterwalk_start(&walk->out, req->dst);
 
-       walk->iv = req->iv;
-       walk->oiv = req->iv;
-
        walk->flags &= ~SKCIPHER_WALK_SLEEP;
        walk->flags |= req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
                       SKCIPHER_WALK_SLEEP : 0;
@@ -510,6 +509,8 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
        int err;
 
        walk->nbytes = 0;
+       walk->iv = req->iv;
+       walk->oiv = req->iv;
 
        if (unlikely(!walk->total))
                return 0;
@@ -525,9 +526,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk,
        scatterwalk_done(&walk->in, 0, walk->total);
        scatterwalk_done(&walk->out, 0, walk->total);
 
-       walk->iv = req->iv;
-       walk->oiv = req->iv;
-
        if (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP)
                walk->flags |= SKCIPHER_WALK_SLEEP;
        else
index 6742f6c68034c5e833505d294902dd97c274c1b0..9bff853e85f37831d8d053a2aa363f139537c9b5 100644 (file)
@@ -1007,7 +1007,7 @@ skip:
        /* The record may be cleared by others, try read next record */
        if (len == -ENOENT)
                goto skip;
-       else if (len < sizeof(*rcd)) {
+       else if (len < 0 || len < sizeof(*rcd)) {
                rc = -EIO;
                goto out;
        }
index 30e84cc600ae6438c25aec2f2975ae4e3f144553..06ea4749ebd9826a3d7b8b0a9798a1cc797f4d61 100644 (file)
@@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
        struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
        struct cpc_register_resource *desired_reg;
        int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
-       struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
+       struct cppc_pcc_data *pcc_ss_data;
        int ret = 0;
 
        if (!cpc_desc || pcc_ss_id < 0) {
index ff2580e7611d18c6d56c58d50c2cbc3a2d54aa36..abeb4df4f22e43d7f0d1398af9962135a37af4b6 100644 (file)
@@ -1670,6 +1670,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                                dev_name(&adev_dimm->dev));
                return -ENXIO;
        }
+       /*
+        * Record nfit_mem for the notification path to track back to
+        * the nfit sysfs attributes for this dimm device object.
+        */
+       dev_set_drvdata(&adev_dimm->dev, nfit_mem);
 
        /*
         * Until standardization materializes we need to consider 4
@@ -1752,9 +1757,11 @@ static void shutdown_dimm_notify(void *data)
                        sysfs_put(nfit_mem->flags_attr);
                        nfit_mem->flags_attr = NULL;
                }
-               if (adev_dimm)
+               if (adev_dimm) {
                        acpi_remove_notify_handler(adev_dimm->handle,
                                        ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
+                       dev_set_drvdata(&adev_dimm->dev, NULL);
+               }
        }
        mutex_unlock(&acpi_desc->init_mutex);
 }
index bccec9de05330b2fe6822369e5c7a409e8759e95..a7ecfde66b7b34f44cc9cffeac0be1b703e29aff 100644 (file)
@@ -482,7 +482,8 @@ enum binder_deferred_state {
  * @tsk                   task_struct for group_leader of process
  *                        (invariant after initialized)
  * @files                 files_struct for process
- *                        (invariant after initialized)
+ *                        (protected by @files_lock)
+ * @files_lock            mutex to protect @files
  * @deferred_work_node:   element for binder_deferred_list
  *                        (protected by binder_deferred_lock)
  * @deferred_work:        bitmap of deferred work to perform
@@ -530,6 +531,7 @@ struct binder_proc {
        int pid;
        struct task_struct *tsk;
        struct files_struct *files;
+       struct mutex files_lock;
        struct hlist_node deferred_work_node;
        int deferred_work;
        bool is_dead;
@@ -877,20 +879,26 @@ static void binder_inc_node_tmpref_ilocked(struct binder_node *node);
 
 static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 {
-       struct files_struct *files = proc->files;
        unsigned long rlim_cur;
        unsigned long irqs;
+       int ret;
 
-       if (files == NULL)
-               return -ESRCH;
-
-       if (!lock_task_sighand(proc->tsk, &irqs))
-               return -EMFILE;
-
+       mutex_lock(&proc->files_lock);
+       if (proc->files == NULL) {
+               ret = -ESRCH;
+               goto err;
+       }
+       if (!lock_task_sighand(proc->tsk, &irqs)) {
+               ret = -EMFILE;
+               goto err;
+       }
        rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
        unlock_task_sighand(proc->tsk, &irqs);
 
-       return __alloc_fd(files, 0, rlim_cur, flags);
+       ret = __alloc_fd(proc->files, 0, rlim_cur, flags);
+err:
+       mutex_unlock(&proc->files_lock);
+       return ret;
 }
 
 /*
@@ -899,8 +907,10 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
 static void task_fd_install(
        struct binder_proc *proc, unsigned int fd, struct file *file)
 {
+       mutex_lock(&proc->files_lock);
        if (proc->files)
                __fd_install(proc->files, fd, file);
+       mutex_unlock(&proc->files_lock);
 }
 
 /*
@@ -910,9 +920,11 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
 {
        int retval;
 
-       if (proc->files == NULL)
-               return -ESRCH;
-
+       mutex_lock(&proc->files_lock);
+       if (proc->files == NULL) {
+               retval = -ESRCH;
+               goto err;
+       }
        retval = __close_fd(proc->files, fd);
        /* can't restart close syscall because file table entry was cleared */
        if (unlikely(retval == -ERESTARTSYS ||
@@ -920,7 +932,8 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
                     retval == -ERESTARTNOHAND ||
                     retval == -ERESTART_RESTARTBLOCK))
                retval = -EINTR;
-
+err:
+       mutex_unlock(&proc->files_lock);
        return retval;
 }
 
@@ -4627,7 +4640,9 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
        ret = binder_alloc_mmap_handler(&proc->alloc, vma);
        if (ret)
                return ret;
+       mutex_lock(&proc->files_lock);
        proc->files = get_files_struct(current);
+       mutex_unlock(&proc->files_lock);
        return 0;
 
 err_bad_arg:
@@ -4651,6 +4666,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
        spin_lock_init(&proc->outer_lock);
        get_task_struct(current->group_leader);
        proc->tsk = current->group_leader;
+       mutex_init(&proc->files_lock);
        INIT_LIST_HEAD(&proc->todo);
        proc->default_priority = task_nice(current);
        binder_dev = container_of(filp->private_data, struct binder_device,
@@ -4903,9 +4919,11 @@ static void binder_deferred_func(struct work_struct *work)
 
                files = NULL;
                if (defer & BINDER_DEFERRED_PUT_FILES) {
+                       mutex_lock(&proc->files_lock);
                        files = proc->files;
                        if (files)
                                proc->files = NULL;
+                       mutex_unlock(&proc->files_lock);
                }
 
                if (defer & BINDER_DEFERRED_FLUSH)
index bdc87907d6a1b297276e9c4acbb89175f69d0d08..2415ad9f6dd4f14fdf7132387e9c9fb2c4b1df26 100644 (file)
@@ -236,6 +236,9 @@ config GENERIC_CPU_DEVICES
 config GENERIC_CPU_AUTOPROBE
        bool
 
+config GENERIC_CPU_VULNERABILITIES
+       bool
+
 config SOC_BUS
        bool
        select GLOB
index eb3af2739537a8def39259206e2345a2adc72c71..07532d83be0bca7d06aa5e99670ac920435e0ed9 100644 (file)
@@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf)
                this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
 }
 
+static bool cache_node_is_unified(struct cacheinfo *this_leaf)
+{
+       return of_property_read_bool(this_leaf->of_node, "cache-unified");
+}
+
 static void cache_of_override_properties(unsigned int cpu)
 {
        int index;
@@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu)
 
        for (index = 0; index < cache_leaves(cpu); index++) {
                this_leaf = this_cpu_ci->info_list + index;
+               /*
+                * init_cache_level must setup the cache level correctly
+                * overriding the architecturally specified levels, so
+                * if type is NONE at this stage, it should be unified
+                */
+               if (this_leaf->type == CACHE_TYPE_NOCACHE &&
+                   cache_node_is_unified(this_leaf))
+                       this_leaf->type = CACHE_TYPE_UNIFIED;
                cache_size(this_leaf);
                cache_get_line_size(this_leaf);
                cache_nr_sets(this_leaf);
index 58a9b608d8216a67f13d6c514d92290dc0a27de3..d99038487a0d2fab5164f8df3f1e0f7e58ea8ea3 100644 (file)
@@ -511,10 +511,58 @@ static void __init cpu_dev_register_generic(void)
 #endif
 }
 
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
+
+ssize_t __weak cpu_show_meltdown(struct device *dev,
+                                struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v1(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_spectre_v2(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       return sprintf(buf, "Not affected\n");
+}
+
+static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
+static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
+static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
+
+static struct attribute *cpu_root_vulnerabilities_attrs[] = {
+       &dev_attr_meltdown.attr,
+       &dev_attr_spectre_v1.attr,
+       &dev_attr_spectre_v2.attr,
+       NULL
+};
+
+static const struct attribute_group cpu_root_vulnerabilities_group = {
+       .name  = "vulnerabilities",
+       .attrs = cpu_root_vulnerabilities_attrs,
+};
+
+static void __init cpu_register_vulnerabilities(void)
+{
+       if (sysfs_create_group(&cpu_subsys.dev_root->kobj,
+                              &cpu_root_vulnerabilities_group))
+               pr_err("Unable to register CPU vulnerabilities\n");
+}
+
+#else
+static inline void cpu_register_vulnerabilities(void) { }
+#endif
+
 void __init cpu_dev_init(void)
 {
        if (subsys_system_register(&cpu_subsys, cpu_root_attr_groups))
                panic("Failed to register CPU subsystem");
 
        cpu_dev_register_generic();
+       cpu_register_vulnerabilities();
 }
index bc8e61506968a0c3da43f2e4396f0cc9550b0d51..d5fe720cf14940b668f8764de2bad6cf95549528 100644 (file)
@@ -1581,9 +1581,8 @@ out:
        return err;
 }
 
-static void lo_release(struct gendisk *disk, fmode_t mode)
+static void __lo_release(struct loop_device *lo)
 {
-       struct loop_device *lo = disk->private_data;
        int err;
 
        if (atomic_dec_return(&lo->lo_refcnt))
@@ -1610,6 +1609,13 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
        mutex_unlock(&lo->lo_ctl_mutex);
 }
 
+static void lo_release(struct gendisk *disk, fmode_t mode)
+{
+       mutex_lock(&loop_index_mutex);
+       __lo_release(disk->private_data);
+       mutex_unlock(&loop_index_mutex);
+}
+
 static const struct block_device_operations lo_fops = {
        .owner =        THIS_MODULE,
        .open =         lo_open,
index ccb9975a97fa3f214d658776450ab618bae26643..ad0477ae820f040affe54f4368d3a02d9da63350 100644 (file)
@@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
 struct nullb_cmd {
        struct list_head list;
        struct llist_node ll_list;
-       call_single_data_t csd;
+       struct __call_single_data csd;
        struct request *rq;
        struct bio *bio;
        unsigned int tag;
+       blk_status_t error;
        struct nullb_queue *nq;
        struct hrtimer timer;
-       blk_status_t error;
 };
 
 struct nullb_queue {
index 38fc5f397fdede04ebaf4f3f9c2e89118ee6b235..cc93522a6d419dc77902bab085306c2b316f6c16 100644 (file)
@@ -3047,13 +3047,21 @@ static void format_lock_cookie(struct rbd_device *rbd_dev, char *buf)
        mutex_unlock(&rbd_dev->watch_mutex);
 }
 
+static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie)
+{
+       struct rbd_client_id cid = rbd_get_cid(rbd_dev);
+
+       strcpy(rbd_dev->lock_cookie, cookie);
+       rbd_set_owner_cid(rbd_dev, &cid);
+       queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
+}
+
 /*
  * lock_rwsem must be held for write
  */
 static int rbd_lock(struct rbd_device *rbd_dev)
 {
        struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
-       struct rbd_client_id cid = rbd_get_cid(rbd_dev);
        char cookie[32];
        int ret;
 
@@ -3068,9 +3076,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
                return ret;
 
        rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED;
-       strcpy(rbd_dev->lock_cookie, cookie);
-       rbd_set_owner_cid(rbd_dev, &cid);
-       queue_work(rbd_dev->task_wq, &rbd_dev->acquired_lock_work);
+       __rbd_lock(rbd_dev, cookie);
        return 0;
 }
 
@@ -3856,7 +3862,7 @@ static void rbd_reacquire_lock(struct rbd_device *rbd_dev)
                        queue_delayed_work(rbd_dev->task_wq,
                                           &rbd_dev->lock_dwork, 0);
        } else {
-               strcpy(rbd_dev->lock_cookie, cookie);
+               __rbd_lock(rbd_dev, cookie);
        }
 }
 
@@ -4381,7 +4387,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
        segment_size = rbd_obj_bytes(&rbd_dev->header);
        blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
        q->limits.max_sectors = queue_max_hw_sectors(q);
-       blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
+       blk_queue_max_segments(q, USHRT_MAX);
        blk_queue_max_segment_size(q, segment_size);
        blk_queue_io_min(q, segment_size);
        blk_queue_io_opt(q, segment_size);
index 328ca93781cf2691fb692b64e86ec4c203ca5b47..1b76d958590275daa6b9c2f8c69ecd2d51655da7 100644 (file)
@@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = {
        .match          = sunxi_rsb_device_match,
        .probe          = sunxi_rsb_device_probe,
        .remove         = sunxi_rsb_device_remove,
+       .uevent         = of_device_uevent_modalias,
 };
 
 static void sunxi_rsb_dev_release(struct device *dev)
index 647d056df88c8dd2a7d8288e35fa2eeba9b7705b..b56c11f51bafad32bafc7b5b9c7467e7cd16632b 100644 (file)
@@ -220,7 +220,8 @@ static bool clk_core_is_enabled(struct clk_core *core)
 
        ret = core->ops->is_enabled(core->hw);
 done:
-       clk_pm_runtime_put(core);
+       if (core->dev)
+               pm_runtime_put(core->dev);
 
        return ret;
 }
@@ -1564,6 +1565,9 @@ static void clk_change_rate(struct clk_core *core)
                best_parent_rate = core->parent->rate;
        }
 
+       if (clk_pm_runtime_get(core))
+               return;
+
        if (core->flags & CLK_SET_RATE_UNGATE) {
                unsigned long flags;
 
@@ -1634,6 +1638,8 @@ static void clk_change_rate(struct clk_core *core)
        /* handle the new child who might not be in core->children yet */
        if (core->new_child)
                clk_change_rate(core->new_child);
+
+       clk_pm_runtime_put(core);
 }
 
 static int clk_core_set_rate_nolock(struct clk_core *core,
index a1a634253d6f2299bfad888b2fa193c98b4ac019..f00d8758ba24f6e5ed537a88be76ff85e5a7c5e4 100644 (file)
@@ -16,6 +16,7 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -83,9 +84,20 @@ static int sun9i_mmc_reset_deassert(struct reset_controller_dev *rcdev,
        return 0;
 }
 
+static int sun9i_mmc_reset_reset(struct reset_controller_dev *rcdev,
+                                unsigned long id)
+{
+       sun9i_mmc_reset_assert(rcdev, id);
+       udelay(10);
+       sun9i_mmc_reset_deassert(rcdev, id);
+
+       return 0;
+}
+
 static const struct reset_control_ops sun9i_mmc_reset_ops = {
        .assert         = sun9i_mmc_reset_assert,
        .deassert       = sun9i_mmc_reset_deassert,
+       .reset          = sun9i_mmc_reset_reset,
 };
 
 static int sun9i_a80_mmc_config_clk_probe(struct platform_device *pdev)
index 58d4f4e1ad6a907991873a03027e6c7aa2f31fc4..ca38229b045ab288a2f250dddaf1b174e8c0572f 100644 (file)
@@ -22,6 +22,8 @@
 
 #include "cpufreq_governor.h"
 
+#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL      (2 * TICK_NSEC / NSEC_PER_USEC)
+
 static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
 
 static DEFINE_MUTEX(gov_dbs_data_mutex);
@@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
 {
        struct dbs_data *dbs_data = to_dbs_data(attr_set);
        struct policy_dbs_info *policy_dbs;
+       unsigned int sampling_interval;
        int ret;
-       ret = sscanf(buf, "%u", &dbs_data->sampling_rate);
-       if (ret != 1)
+
+       ret = sscanf(buf, "%u", &sampling_interval);
+       if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
                return -EINVAL;
 
+       dbs_data->sampling_rate = sampling_interval;
+
        /*
         * We are operating under dbs_data->mutex and so the list and its
         * entries can't be freed concurrently.
@@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
        if (ret)
                goto free_policy_dbs_info;
 
-       dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy);
+       /*
+        * The sampling interval should not be less than the transition latency
+        * of the CPU and it also cannot be too small for dbs_update() to work
+        * correctly.
+        */
+       dbs_data->sampling_rate = max_t(unsigned int,
+                                       CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
+                                       cpufreq_policy_transition_delay_us(policy));
 
        if (!have_governor_per_policy())
                gov->gdbs_data = dbs_data;
index 628fe899cb483da9dbf0f7661b537734bc82f784..d9b2c2de49c43f125c91b382f818ff81d0ffc6ac 100644 (file)
@@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
        val >>= OCOTP_CFG3_SPEED_SHIFT;
        val &= 0x3;
 
-       if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
-            of_machine_is_compatible("fsl,imx6q"))
-               if (dev_pm_opp_disable(dev, 1200000000))
-                       dev_warn(dev, "failed to disable 1.2GHz OPP\n");
        if (val < OCOTP_CFG3_SPEED_996MHZ)
                if (dev_pm_opp_disable(dev, 996000000))
                        dev_warn(dev, "failed to disable 996MHz OPP\n");
-       if (of_machine_is_compatible("fsl,imx6q")) {
+
+       if (of_machine_is_compatible("fsl,imx6q") ||
+           of_machine_is_compatible("fsl,imx6qp")) {
                if (val != OCOTP_CFG3_SPEED_852MHZ)
                        if (dev_pm_opp_disable(dev, 852000000))
                                dev_warn(dev, "failed to disable 852MHz OPP\n");
+               if (val != OCOTP_CFG3_SPEED_1P2GHZ)
+                       if (dev_pm_opp_disable(dev, 1200000000))
+                               dev_warn(dev, "failed to disable 1.2GHz OPP\n");
        }
        iounmap(base);
 put_node:
index 3e104f5aa0c2f9e05a3d7abd22dd6045ed420e9c..b56b3f711d9410a9ebddaa821428fd646ad02417 100644 (file)
@@ -5,6 +5,7 @@ config CRYPTO_DEV_CHELSIO
        select CRYPTO_SHA256
        select CRYPTO_SHA512
        select CRYPTO_AUTHENC
+       select CRYPTO_GF128MUL
        ---help---
          The Chelsio Crypto Co-processor driver for T6 adapters.
 
index 89ba9e85c0f377577c3e119bac5e7157372faed6..4bcef78a08aad241675e50d959aff2a6c58e126b 100644 (file)
@@ -607,6 +607,7 @@ static inline void safexcel_handle_result_descriptor(struct safexcel_crypto_priv
                ndesc = ctx->handle_result(priv, ring, sreq->req,
                                           &should_complete, &ret);
                if (ndesc < 0) {
+                       kfree(sreq);
                        dev_err(priv->dev, "failed to handle result (%d)", ndesc);
                        return;
                }
index 5438552bc6d783b57a23763e64c59afb90c340ae..fcc0a606d74839bb46f2af6d0ac66bdb12b3d511 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <crypto/aes.h>
 #include <crypto/skcipher.h>
+#include <crypto/internal/skcipher.h>
 
 #include "safexcel.h"
 
@@ -33,6 +34,10 @@ struct safexcel_cipher_ctx {
        unsigned int key_len;
 };
 
+struct safexcel_cipher_req {
+       bool needs_inv;
+};
+
 static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx,
                                  struct crypto_async_request *async,
                                  struct safexcel_command_desc *cdesc,
@@ -126,9 +131,9 @@ static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
        return 0;
 }
 
-static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
-                                 struct crypto_async_request *async,
-                                 bool *should_complete, int *ret)
+static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
+                                     struct crypto_async_request *async,
+                                     bool *should_complete, int *ret)
 {
        struct skcipher_request *req = skcipher_request_cast(async);
        struct safexcel_result_desc *rdesc;
@@ -265,7 +270,6 @@ static int safexcel_aes_send(struct crypto_async_request *async,
        spin_unlock_bh(&priv->ring[ring].egress_lock);
 
        request->req = &req->base;
-       ctx->base.handle_result = safexcel_handle_result;
 
        *commands = n_cdesc;
        *results = n_rdesc;
@@ -341,8 +345,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 
        ring = safexcel_select_ring(priv);
        ctx->base.ring = ring;
-       ctx->base.needs_inv = false;
-       ctx->base.send = safexcel_aes_send;
 
        spin_lock_bh(&priv->ring[ring].queue_lock);
        enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
@@ -359,6 +361,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
        return ndesc;
 }
 
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+                                 struct crypto_async_request *async,
+                                 bool *should_complete, int *ret)
+{
+       struct skcipher_request *req = skcipher_request_cast(async);
+       struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+       int err;
+
+       if (sreq->needs_inv) {
+               sreq->needs_inv = false;
+               err = safexcel_handle_inv_result(priv, ring, async,
+                                                should_complete, ret);
+       } else {
+               err = safexcel_handle_req_result(priv, ring, async,
+                                                should_complete, ret);
+       }
+
+       return err;
+}
+
 static int safexcel_cipher_send_inv(struct crypto_async_request *async,
                                    int ring, struct safexcel_request *request,
                                    int *commands, int *results)
@@ -368,8 +390,6 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async,
        struct safexcel_crypto_priv *priv = ctx->priv;
        int ret;
 
-       ctx->base.handle_result = safexcel_handle_inv_result;
-
        ret = safexcel_invalidate_cache(async, &ctx->base, priv,
                                        ctx->base.ctxr_dma, ring, request);
        if (unlikely(ret))
@@ -381,28 +401,46 @@ static int safexcel_cipher_send_inv(struct crypto_async_request *async,
        return 0;
 }
 
+static int safexcel_send(struct crypto_async_request *async,
+                        int ring, struct safexcel_request *request,
+                        int *commands, int *results)
+{
+       struct skcipher_request *req = skcipher_request_cast(async);
+       struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
+       int ret;
+
+       if (sreq->needs_inv)
+               ret = safexcel_cipher_send_inv(async, ring, request,
+                                              commands, results);
+       else
+               ret = safexcel_aes_send(async, ring, request,
+                                       commands, results);
+       return ret;
+}
+
 static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm)
 {
        struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
        struct safexcel_crypto_priv *priv = ctx->priv;
-       struct skcipher_request req;
+       SKCIPHER_REQUEST_ON_STACK(req, __crypto_skcipher_cast(tfm));
+       struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
        struct safexcel_inv_result result = {};
        int ring = ctx->base.ring;
 
-       memset(&req, 0, sizeof(struct skcipher_request));
+       memset(req, 0, sizeof(struct skcipher_request));
 
        /* create invalidation request */
        init_completion(&result.completion);
-       skcipher_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
-                                       safexcel_inv_complete, &result);
+       skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                     safexcel_inv_complete, &result);
 
-       skcipher_request_set_tfm(&req, __crypto_skcipher_cast(tfm));
-       ctx = crypto_tfm_ctx(req.base.tfm);
+       skcipher_request_set_tfm(req, __crypto_skcipher_cast(tfm));
+       ctx = crypto_tfm_ctx(req->base.tfm);
        ctx->base.exit_inv = true;
-       ctx->base.send = safexcel_cipher_send_inv;
+       sreq->needs_inv = true;
 
        spin_lock_bh(&priv->ring[ring].queue_lock);
-       crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+       crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
        spin_unlock_bh(&priv->ring[ring].queue_lock);
 
        if (!priv->ring[ring].need_dequeue)
@@ -424,19 +462,21 @@ static int safexcel_aes(struct skcipher_request *req,
                        enum safexcel_cipher_direction dir, u32 mode)
 {
        struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+       struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
        struct safexcel_crypto_priv *priv = ctx->priv;
        int ret, ring;
 
+       sreq->needs_inv = false;
        ctx->direction = dir;
        ctx->mode = mode;
 
        if (ctx->base.ctxr) {
-               if (ctx->base.needs_inv)
-                       ctx->base.send = safexcel_cipher_send_inv;
+               if (ctx->base.needs_inv) {
+                       sreq->needs_inv = true;
+                       ctx->base.needs_inv = false;
+               }
        } else {
                ctx->base.ring = safexcel_select_ring(priv);
-               ctx->base.send = safexcel_aes_send;
-
                ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
                                                 EIP197_GFP_FLAGS(req->base),
                                                 &ctx->base.ctxr_dma);
@@ -476,6 +516,11 @@ static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm)
                             alg.skcipher.base);
 
        ctx->priv = tmpl->priv;
+       ctx->base.send = safexcel_send;
+       ctx->base.handle_result = safexcel_handle_result;
+
+       crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm),
+                                   sizeof(struct safexcel_cipher_req));
 
        return 0;
 }
index 74feb622710147baf79d49c25a119be7e640ac7a..0c5a5820b06e53cc9efa776c37822730af1421e4 100644 (file)
@@ -32,9 +32,10 @@ struct safexcel_ahash_req {
        bool last_req;
        bool finish;
        bool hmac;
+       bool needs_inv;
 
        u8 state_sz;    /* expected sate size, only set once */
-       u32 state[SHA256_DIGEST_SIZE / sizeof(u32)];
+       u32 state[SHA256_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32));
 
        u64 len;
        u64 processed;
@@ -119,15 +120,15 @@ static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
        }
 }
 
-static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
-                                 struct crypto_async_request *async,
-                                 bool *should_complete, int *ret)
+static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
+                                     struct crypto_async_request *async,
+                                     bool *should_complete, int *ret)
 {
        struct safexcel_result_desc *rdesc;
        struct ahash_request *areq = ahash_request_cast(async);
        struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
        struct safexcel_ahash_req *sreq = ahash_request_ctx(areq);
-       int cache_len, result_sz = sreq->state_sz;
+       int cache_len;
 
        *ret = 0;
 
@@ -148,8 +149,8 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
        spin_unlock_bh(&priv->ring[ring].egress_lock);
 
        if (sreq->finish)
-               result_sz = crypto_ahash_digestsize(ahash);
-       memcpy(sreq->state, areq->result, result_sz);
+               memcpy(areq->result, sreq->state,
+                      crypto_ahash_digestsize(ahash));
 
        dma_unmap_sg(priv->dev, areq->src,
                     sg_nents_for_len(areq->src, areq->nbytes), DMA_TO_DEVICE);
@@ -165,9 +166,9 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
        return 1;
 }
 
-static int safexcel_ahash_send(struct crypto_async_request *async, int ring,
-                              struct safexcel_request *request, int *commands,
-                              int *results)
+static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
+                                  struct safexcel_request *request,
+                                  int *commands, int *results)
 {
        struct ahash_request *areq = ahash_request_cast(async);
        struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
@@ -273,7 +274,7 @@ send_command:
        /* Add the token */
        safexcel_hash_token(first_cdesc, len, req->state_sz);
 
-       ctx->base.result_dma = dma_map_single(priv->dev, areq->result,
+       ctx->base.result_dma = dma_map_single(priv->dev, req->state,
                                              req->state_sz, DMA_FROM_DEVICE);
        if (dma_mapping_error(priv->dev, ctx->base.result_dma)) {
                ret = -EINVAL;
@@ -292,7 +293,6 @@ send_command:
 
        req->processed += len;
        request->req = &areq->base;
-       ctx->base.handle_result = safexcel_handle_result;
 
        *commands = n_cdesc;
        *results = 1;
@@ -374,8 +374,6 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 
        ring = safexcel_select_ring(priv);
        ctx->base.ring = ring;
-       ctx->base.needs_inv = false;
-       ctx->base.send = safexcel_ahash_send;
 
        spin_lock_bh(&priv->ring[ring].queue_lock);
        enq_ret = crypto_enqueue_request(&priv->ring[ring].queue, async);
@@ -392,6 +390,26 @@ static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
        return 1;
 }
 
+static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
+                                 struct crypto_async_request *async,
+                                 bool *should_complete, int *ret)
+{
+       struct ahash_request *areq = ahash_request_cast(async);
+       struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+       int err;
+
+       if (req->needs_inv) {
+               req->needs_inv = false;
+               err = safexcel_handle_inv_result(priv, ring, async,
+                                                should_complete, ret);
+       } else {
+               err = safexcel_handle_req_result(priv, ring, async,
+                                                should_complete, ret);
+       }
+
+       return err;
+}
+
 static int safexcel_ahash_send_inv(struct crypto_async_request *async,
                                   int ring, struct safexcel_request *request,
                                   int *commands, int *results)
@@ -400,7 +418,6 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async,
        struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
        int ret;
 
-       ctx->base.handle_result = safexcel_handle_inv_result;
        ret = safexcel_invalidate_cache(async, &ctx->base, ctx->priv,
                                        ctx->base.ctxr_dma, ring, request);
        if (unlikely(ret))
@@ -412,28 +429,46 @@ static int safexcel_ahash_send_inv(struct crypto_async_request *async,
        return 0;
 }
 
+static int safexcel_ahash_send(struct crypto_async_request *async,
+                              int ring, struct safexcel_request *request,
+                              int *commands, int *results)
+{
+       struct ahash_request *areq = ahash_request_cast(async);
+       struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+       int ret;
+
+       if (req->needs_inv)
+               ret = safexcel_ahash_send_inv(async, ring, request,
+                                             commands, results);
+       else
+               ret = safexcel_ahash_send_req(async, ring, request,
+                                             commands, results);
+       return ret;
+}
+
 static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
 {
        struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
        struct safexcel_crypto_priv *priv = ctx->priv;
-       struct ahash_request req;
+       AHASH_REQUEST_ON_STACK(req, __crypto_ahash_cast(tfm));
+       struct safexcel_ahash_req *rctx = ahash_request_ctx(req);
        struct safexcel_inv_result result = {};
        int ring = ctx->base.ring;
 
-       memset(&req, 0, sizeof(struct ahash_request));
+       memset(req, 0, sizeof(struct ahash_request));
 
        /* create invalidation request */
        init_completion(&result.completion);
-       ahash_request_set_callback(&req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+       ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
                                   safexcel_inv_complete, &result);
 
-       ahash_request_set_tfm(&req, __crypto_ahash_cast(tfm));
-       ctx = crypto_tfm_ctx(req.base.tfm);
+       ahash_request_set_tfm(req, __crypto_ahash_cast(tfm));
+       ctx = crypto_tfm_ctx(req->base.tfm);
        ctx->base.exit_inv = true;
-       ctx->base.send = safexcel_ahash_send_inv;
+       rctx->needs_inv = true;
 
        spin_lock_bh(&priv->ring[ring].queue_lock);
-       crypto_enqueue_request(&priv->ring[ring].queue, &req.base);
+       crypto_enqueue_request(&priv->ring[ring].queue, &req->base);
        spin_unlock_bh(&priv->ring[ring].queue_lock);
 
        if (!priv->ring[ring].need_dequeue)
@@ -481,14 +516,16 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
        struct safexcel_crypto_priv *priv = ctx->priv;
        int ret, ring;
 
-       ctx->base.send = safexcel_ahash_send;
+       req->needs_inv = false;
 
        if (req->processed && ctx->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
                ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq);
 
        if (ctx->base.ctxr) {
-               if (ctx->base.needs_inv)
-                       ctx->base.send = safexcel_ahash_send_inv;
+               if (ctx->base.needs_inv) {
+                       ctx->base.needs_inv = false;
+                       req->needs_inv = true;
+               }
        } else {
                ctx->base.ring = safexcel_select_ring(priv);
                ctx->base.ctxr = dma_pool_zalloc(priv->context_pool,
@@ -622,6 +659,8 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm)
                             struct safexcel_alg_template, alg.ahash);
 
        ctx->priv = tmpl->priv;
+       ctx->base.send = safexcel_ahash_send;
+       ctx->base.handle_result = safexcel_handle_result;
 
        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
                                 sizeof(struct safexcel_ahash_req));
index 48de52cf2ecc10f7a1a33b1fa0025e94ec706a19..662e709812cc6f79e973fa10f71ad8f6d1991193 100644 (file)
@@ -1625,6 +1625,7 @@ static int queue_cache_init(void)
                                          CWQ_ENTRY_SIZE, 0, NULL);
        if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
                kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
+               queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
                return -ENOMEM;
        }
        return 0;
@@ -1634,6 +1635,8 @@ static void queue_cache_destroy(void)
 {
        kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
        kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
+       queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
+       queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL;
 }
 
 static long spu_queue_register_workfn(void *arg)
index ec8ac5c4dd84f93e386db9871946c81ea1dbc8d5..055e2e8f985a3fbc5b334f8c0d414c8b6c51e3e3 100644 (file)
 
 #define NO_FURTHER_WRITE_ACTION -1
 
-#ifndef phys_to_page
-#define phys_to_page(x)                pfn_to_page((x) >> PAGE_SHIFT)
-#endif
-
 /**
  * efi_free_all_buff_pages - free all previous allocated buffer pages
  * @cap_info: pointer to current instance of capsule_info structure
@@ -35,7 +31,7 @@
 static void efi_free_all_buff_pages(struct capsule_info *cap_info)
 {
        while (cap_info->index > 0)
-               __free_page(phys_to_page(cap_info->pages[--cap_info->index]));
+               __free_page(cap_info->pages[--cap_info->index]);
 
        cap_info->index = NO_FURTHER_WRITE_ACTION;
 }
@@ -71,6 +67,14 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info)
 
        cap_info->pages = temp_page;
 
+       temp_page = krealloc(cap_info->phys,
+                            pages_needed * sizeof(phys_addr_t *),
+                            GFP_KERNEL | __GFP_ZERO);
+       if (!temp_page)
+               return -ENOMEM;
+
+       cap_info->phys = temp_page;
+
        return 0;
 }
 
@@ -105,9 +109,24 @@ int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
  **/
 static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
 {
+       bool do_vunmap = false;
        int ret;
 
-       ret = efi_capsule_update(&cap_info->header, cap_info->pages);
+       /*
+        * cap_info->capsule may have been assigned already by a quirk
+        * handler, so only overwrite it if it is NULL
+        */
+       if (!cap_info->capsule) {
+               cap_info->capsule = vmap(cap_info->pages, cap_info->index,
+                                        VM_MAP, PAGE_KERNEL);
+               if (!cap_info->capsule)
+                       return -ENOMEM;
+               do_vunmap = true;
+       }
+
+       ret = efi_capsule_update(cap_info->capsule, cap_info->phys);
+       if (do_vunmap)
+               vunmap(cap_info->capsule);
        if (ret) {
                pr_err("capsule update failed\n");
                return ret;
@@ -165,10 +184,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
                        goto failed;
                }
 
-               cap_info->pages[cap_info->index++] = page_to_phys(page);
+               cap_info->pages[cap_info->index] = page;
+               cap_info->phys[cap_info->index] = page_to_phys(page);
                cap_info->page_bytes_remain = PAGE_SIZE;
+               cap_info->index++;
        } else {
-               page = phys_to_page(cap_info->pages[cap_info->index - 1]);
+               page = cap_info->pages[cap_info->index - 1];
        }
 
        kbuff = kmap(page);
@@ -252,6 +273,7 @@ static int efi_capsule_release(struct inode *inode, struct file *file)
        struct capsule_info *cap_info = file->private_data;
 
        kfree(cap_info->pages);
+       kfree(cap_info->phys);
        kfree(file->private_data);
        file->private_data = NULL;
        return 0;
@@ -281,6 +303,13 @@ static int efi_capsule_open(struct inode *inode, struct file *file)
                return -ENOMEM;
        }
 
+       cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
+       if (!cap_info->phys) {
+               kfree(cap_info->pages);
+               kfree(cap_info);
+               return -ENOMEM;
+       }
+
        file->private_data = cap_info;
 
        return 0;
index dfcf56ee3c6181fc64d0a074e2f5659fec35a6d8..76861a00bb92c4b449f346433f282da8e6cb82c9 100644 (file)
@@ -522,6 +522,7 @@ static struct of_device_id const bcm_kona_gpio_of_match[] = {
  * category than their parents, so it won't report false recursion.
  */
 static struct lock_class_key gpio_lock_class;
+static struct lock_class_key gpio_request_class;
 
 static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
                                 irq_hw_number_t hwirq)
@@ -531,7 +532,7 @@ static int bcm_kona_gpio_irq_map(struct irq_domain *d, unsigned int irq,
        ret = irq_set_chip_data(irq, d->host_data);
        if (ret < 0)
                return ret;
-       irq_set_lockdep_class(irq, &gpio_lock_class);
+       irq_set_lockdep_class(irq, &gpio_lock_class, &gpio_request_class);
        irq_set_chip_and_handler(irq, &bcm_gpio_irq_chip, handle_simple_irq);
        irq_set_noprobe(irq);
 
index 545d43a587b7ef1308dc827ffbea2c2dd733b478..bb4f8cf18bd9f6c7e47328aaf2ec1cdd3dfc3d6b 100644 (file)
@@ -327,6 +327,7 @@ static struct brcmstb_gpio_bank *brcmstb_gpio_hwirq_to_bank(
  * category than their parents, so it won't report false recursion.
  */
 static struct lock_class_key brcmstb_gpio_irq_lock_class;
+static struct lock_class_key brcmstb_gpio_irq_request_class;
 
 
 static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
@@ -346,7 +347,8 @@ static int brcmstb_gpio_irq_map(struct irq_domain *d, unsigned int irq,
        ret = irq_set_chip_data(irq, &bank->gc);
        if (ret < 0)
                return ret;
-       irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class);
+       irq_set_lockdep_class(irq, &brcmstb_gpio_irq_lock_class,
+                             &brcmstb_gpio_irq_request_class);
        irq_set_chip_and_handler(irq, &priv->irq_chip, handle_level_irq);
        irq_set_noprobe(irq);
        return 0;
index 23e771dba4c17ab7a497feb37c108f15fe117c79..e85903eddc68ecd5930df5ded45efbc398aa57ca 100644 (file)
@@ -103,8 +103,8 @@ static int gpio_reg_to_irq(struct gpio_chip *gc, unsigned offset)
        struct gpio_reg *r = to_gpio_reg(gc);
        int irq = r->irqs[offset];
 
-       if (irq >= 0 && r->irq.domain)
-               irq = irq_find_mapping(r->irq.domain, irq);
+       if (irq >= 0 && r->irqdomain)
+               irq = irq_find_mapping(r->irqdomain, irq);
 
        return irq;
 }
index 8db47f671708752dbaae420478935f1613847dd3..02fa8fe2292a13608e332d6247e9c0b55870ae98 100644 (file)
@@ -565,6 +565,7 @@ static const struct dev_pm_ops tegra_gpio_pm_ops = {
  * than their parents, so it won't report false recursion.
  */
 static struct lock_class_key gpio_lock_class;
+static struct lock_class_key gpio_request_class;
 
 static int tegra_gpio_probe(struct platform_device *pdev)
 {
@@ -670,7 +671,8 @@ static int tegra_gpio_probe(struct platform_device *pdev)
 
                bank = &tgi->bank_info[GPIO_BANK(gpio)];
 
-               irq_set_lockdep_class(irq, &gpio_lock_class);
+               irq_set_lockdep_class(irq, &gpio_lock_class,
+                                     &gpio_request_class);
                irq_set_chip_data(irq, bank);
                irq_set_chip_and_handler(irq, &tgi->ic, handle_simple_irq);
        }
index 2313af82fad3d4bb3cf7027bec9f25d9e6b5887c..acd59113e08b9cda0fce6930981520a481ec7203 100644 (file)
@@ -139,7 +139,7 @@ static int xgene_gpio_sb_to_irq(struct gpio_chip *gc, u32 gpio)
 
 static int xgene_gpio_sb_domain_activate(struct irq_domain *d,
                                         struct irq_data *irq_data,
-                                        bool early)
+                                        bool reserve)
 {
        struct xgene_gpio_sb *priv = d->host_data;
        u32 gpio = HWIRQ_TO_GPIO(priv, irq_data->hwirq);
index eb4528c87c0b3977420a2108c7feaaf9b2a95869..d6f3d9ee1350e422e3f373427d55d6ce8b6be565 100644 (file)
@@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
        }
 
        if (!chip->names)
-               devprop_gpiochip_set_names(chip);
+               devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent));
 
        acpi_gpiochip_request_regions(acpi_gpio);
        acpi_gpiochip_scan_gpios(acpi_gpio);
index 27f383bda7d9621322a3340d9def92354d21d7a3..f748aa3e77f72000b357718d5c955abba38c5c07 100644 (file)
 /**
  * devprop_gpiochip_set_names - Set GPIO line names using device properties
  * @chip: GPIO chip whose lines should be named, if possible
+ * @fwnode: Property Node containing the gpio-line-names property
  *
  * Looks for device property "gpio-line-names" and if it exists assigns
  * GPIO line names for the chip. The memory allocated for the assigned
  * names belong to the underlying firmware node and should not be released
  * by the caller.
  */
-void devprop_gpiochip_set_names(struct gpio_chip *chip)
+void devprop_gpiochip_set_names(struct gpio_chip *chip,
+                               const struct fwnode_handle *fwnode)
 {
        struct gpio_device *gdev = chip->gpiodev;
        const char **names;
        int ret, i;
 
-       if (!chip->parent) {
-               dev_warn(&gdev->dev, "GPIO chip parent is NULL\n");
-               return;
-       }
-
-       ret = device_property_read_string_array(chip->parent, "gpio-line-names",
+       ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
                                                NULL, 0);
        if (ret < 0)
                return;
 
        if (ret != gdev->ngpio) {
-               dev_warn(chip->parent,
+               dev_warn(&gdev->dev,
                         "names %d do not match number of GPIOs %d\n", ret,
                         gdev->ngpio);
                return;
@@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip)
        if (!names)
                return;
 
-       ret = device_property_read_string_array(chip->parent, "gpio-line-names",
+       ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
                                                names, gdev->ngpio);
        if (ret < 0) {
-               dev_warn(chip->parent, "failed to read GPIO line names\n");
+               dev_warn(&gdev->dev, "failed to read GPIO line names\n");
                kfree(names);
                return;
        }
index e0d59e61b52fa6aa53e7830ae1ab1c59ceec2453..72a0695d2ac3a3d3217462ff4ea85147921f5dcd 100644 (file)
@@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip)
 
        /* If the chip defines names itself, these take precedence */
        if (!chip->names)
-               devprop_gpiochip_set_names(chip);
+               devprop_gpiochip_set_names(chip,
+                                          of_fwnode_handle(chip->of_node));
 
        of_node_get(chip->of_node);
 
index aad84a6306c4e5ddbc3364d58add85fa1b1e2583..14532d9576e4239ff60a193bec13a1a819c338b8 100644 (file)
@@ -73,7 +73,8 @@ LIST_HEAD(gpio_devices);
 
 static void gpiochip_free_hogs(struct gpio_chip *chip);
 static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
-                               struct lock_class_key *key);
+                               struct lock_class_key *lock_key,
+                               struct lock_class_key *request_key);
 static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
 static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip);
 static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip);
@@ -1100,7 +1101,8 @@ static void gpiochip_setup_devs(void)
 }
 
 int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
-                              struct lock_class_key *key)
+                              struct lock_class_key *lock_key,
+                              struct lock_class_key *request_key)
 {
        unsigned long   flags;
        int             status = 0;
@@ -1246,7 +1248,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
        if (status)
                goto err_remove_from_list;
 
-       status = gpiochip_add_irqchip(chip, key);
+       status = gpiochip_add_irqchip(chip, lock_key, request_key);
        if (status)
                goto err_remove_chip;
 
@@ -1632,7 +1634,7 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
         * This lock class tells lockdep that GPIO irqs are in a different
         * category than their parents, so it won't report false recursion.
         */
-       irq_set_lockdep_class(irq, chip->irq.lock_key);
+       irq_set_lockdep_class(irq, chip->irq.lock_key, chip->irq.request_key);
        irq_set_chip_and_handler(irq, chip->irq.chip, chip->irq.handler);
        /* Chips that use nested thread handlers have them marked */
        if (chip->irq.threaded)
@@ -1712,10 +1714,12 @@ static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
 /**
  * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip
  * @gpiochip: the GPIO chip to add the IRQ chip to
- * @lock_key: lockdep class
+ * @lock_key: lockdep class for IRQ lock
+ * @request_key: lockdep class for IRQ request
  */
 static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
-                               struct lock_class_key *lock_key)
+                               struct lock_class_key *lock_key,
+                               struct lock_class_key *request_key)
 {
        struct irq_chip *irqchip = gpiochip->irq.chip;
        const struct irq_domain_ops *ops;
@@ -1753,6 +1757,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
        gpiochip->to_irq = gpiochip_to_irq;
        gpiochip->irq.default_type = type;
        gpiochip->irq.lock_key = lock_key;
+       gpiochip->irq.request_key = request_key;
 
        if (gpiochip->irq.domain_ops)
                ops = gpiochip->irq.domain_ops;
@@ -1850,7 +1855,8 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip)
  * @type: the default type for IRQs on this irqchip, pass IRQ_TYPE_NONE
  * to have the core avoid setting up any default type in the hardware.
  * @threaded: whether this irqchip uses a nested thread handler
- * @lock_key: lockdep class
+ * @lock_key: lockdep class for IRQ lock
+ * @request_key: lockdep class for IRQ request
  *
  * This function closely associates a certain irqchip with a certain
  * gpiochip, providing an irq domain to translate the local IRQs to
@@ -1872,7 +1878,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
                             irq_flow_handler_t handler,
                             unsigned int type,
                             bool threaded,
-                            struct lock_class_key *lock_key)
+                            struct lock_class_key *lock_key,
+                            struct lock_class_key *request_key)
 {
        struct device_node *of_node;
 
@@ -1913,6 +1920,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
        gpiochip->irq.default_type = type;
        gpiochip->to_irq = gpiochip_to_irq;
        gpiochip->irq.lock_key = lock_key;
+       gpiochip->irq.request_key = request_key;
        gpiochip->irq.domain = irq_domain_add_simple(of_node,
                                        gpiochip->ngpio, first_irq,
                                        &gpiochip_domain_ops, gpiochip);
@@ -1940,7 +1948,8 @@ EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_key);
 #else /* CONFIG_GPIOLIB_IRQCHIP */
 
 static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
-                                      struct lock_class_key *key)
+                                      struct lock_class_key *lock_key,
+                                      struct lock_class_key *request_key)
 {
        return 0;
 }
@@ -2883,6 +2892,27 @@ void gpiod_set_raw_value(struct gpio_desc *desc, int value)
 }
 EXPORT_SYMBOL_GPL(gpiod_set_raw_value);
 
+/**
+ * gpiod_set_value_nocheck() - set a GPIO line value without checking
+ * @desc: the descriptor to set the value on
+ * @value: value to set
+ *
+ * This sets the value of a GPIO line backing a descriptor, applying
+ * different semantic quirks like active low and open drain/source
+ * handling.
+ */
+static void gpiod_set_value_nocheck(struct gpio_desc *desc, int value)
+{
+       if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+               value = !value;
+       if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
+               gpio_set_open_drain_value_commit(desc, value);
+       else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
+               gpio_set_open_source_value_commit(desc, value);
+       else
+               gpiod_set_raw_value_commit(desc, value);
+}
+
 /**
  * gpiod_set_value() - assign a gpio's value
  * @desc: gpio whose value will be assigned
@@ -2897,16 +2927,8 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_value);
 void gpiod_set_value(struct gpio_desc *desc, int value)
 {
        VALIDATE_DESC_VOID(desc);
-       /* Should be using gpiod_set_value_cansleep() */
        WARN_ON(desc->gdev->chip->can_sleep);
-       if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
-               value = !value;
-       if (test_bit(FLAG_OPEN_DRAIN, &desc->flags))
-               gpio_set_open_drain_value_commit(desc, value);
-       else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags))
-               gpio_set_open_source_value_commit(desc, value);
-       else
-               gpiod_set_raw_value_commit(desc, value);
+       gpiod_set_value_nocheck(desc, value);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_value);
 
@@ -3234,9 +3256,7 @@ void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 {
        might_sleep_if(extra_checks);
        VALIDATE_DESC_VOID(desc);
-       if (test_bit(FLAG_ACTIVE_LOW, &desc->flags))
-               value = !value;
-       gpiod_set_raw_value_commit(desc, value);
+       gpiod_set_value_nocheck(desc, value);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
 
index af48322839c3d6004ee16a32591f1f434d364fc8..6c44d165213910a259ee94c85731ab4d1ca68431 100644 (file)
@@ -228,7 +228,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc)
        return desc - &desc->gdev->descs[0];
 }
 
-void devprop_gpiochip_set_names(struct gpio_chip *chip);
+void devprop_gpiochip_set_names(struct gpio_chip *chip,
+                               const struct fwnode_handle *fwnode);
 
 /* With descriptor prefix */
 
index da43813d67a4ad56ddecb79ac0a749afe29abc43..5aeb5f8816f3b9a68666cf57372cddeb12c2b36a 100644 (file)
@@ -2467,7 +2467,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
                                  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
                                  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
                                  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
-                                 PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */
+                                 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
                                  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
                                  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
                amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
index f71fe6d2ddda795fd2fb914740b75845893c1298..bb5fa895fb6446097580ce229ef23dc473f979af 100644 (file)
@@ -2336,7 +2336,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                       const struct dm_connector_state *dm_state)
 {
        struct drm_display_mode *preferred_mode = NULL;
-       const struct drm_connector *drm_connector;
+       struct drm_connector *drm_connector;
        struct dc_stream_state *stream = NULL;
        struct drm_display_mode mode = *drm_mode;
        bool native_mode_found = false;
@@ -2355,11 +2355,13 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
        if (!aconnector->dc_sink) {
                /*
-                * Exclude MST from creating fake_sink
-                * TODO: need to enable MST into fake_sink feature
+                * Create dc_sink when necessary to MST
+                * Don't apply fake_sink to MST
                 */
-               if (aconnector->mst_port)
-                       goto stream_create_fail;
+               if (aconnector->mst_port) {
+                       dm_dp_mst_dc_sink_create(drm_connector);
+                       goto mst_dc_sink_create_done;
+               }
 
                if (create_fake_sink(aconnector))
                        goto stream_create_fail;
@@ -2410,6 +2412,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 stream_create_fail:
 dm_state_null:
 drm_connector_null:
+mst_dc_sink_create_done:
        return stream;
 }
 
index 117521c6a6ed26213c60ec5316df64b66eeaba12..0230250a1164bb01b41f3a2b22011960909e14bb 100644 (file)
@@ -189,6 +189,8 @@ struct amdgpu_dm_connector {
        struct mutex hpd_lock;
 
        bool fake_enable;
+
+       bool mst_connected;
 };
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
index f8efb98b1fa72f86ecbec4c568a653af164c7daa..638c2c2b5cd79069e7312b7d7f23a28b6f5eb3b6 100644 (file)
@@ -185,6 +185,42 @@ static int dm_connector_update_modes(struct drm_connector *connector,
        return ret;
 }
 
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
+{
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+       struct edid *edid;
+       struct dc_sink *dc_sink;
+       struct dc_sink_init_data init_params = {
+                       .link = aconnector->dc_link,
+                       .sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
+
+       edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
+
+       if (!edid) {
+               drm_mode_connector_update_edid_property(
+                       &aconnector->base,
+                       NULL);
+               return;
+       }
+
+       aconnector->edid = edid;
+
+       dc_sink = dc_link_add_remote_sink(
+               aconnector->dc_link,
+               (uint8_t *)aconnector->edid,
+               (aconnector->edid->extensions + 1) * EDID_LENGTH,
+               &init_params);
+
+       dc_sink->priv = aconnector;
+       aconnector->dc_sink = dc_sink;
+
+       amdgpu_dm_add_sink_to_freesync_module(
+                       connector, aconnector->edid);
+
+       drm_mode_connector_update_edid_property(
+                                       &aconnector->base, aconnector->edid);
+}
+
 static int dm_dp_mst_get_modes(struct drm_connector *connector)
 {
        struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
@@ -311,6 +347,7 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
                        drm_mode_connector_set_path_property(connector, pathprop);
 
                        drm_connector_list_iter_end(&conn_iter);
+                       aconnector->mst_connected = true;
                        return &aconnector->base;
                }
        }
@@ -363,6 +400,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
         */
        amdgpu_dm_connector_funcs_reset(connector);
 
+       aconnector->mst_connected = true;
+
        DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
                        aconnector, connector->base.id, aconnector->mst_port);
 
@@ -394,6 +433,8 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
        drm_mode_connector_update_edid_property(
                        &aconnector->base,
                        NULL);
+
+       aconnector->mst_connected = false;
 }
 
 static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
@@ -404,10 +445,18 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
        drm_kms_helper_hotplug_event(dev);
 }
 
+static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
+{
+       mutex_lock(&connector->dev->mode_config.mutex);
+       drm_mode_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD);
+       mutex_unlock(&connector->dev->mode_config.mutex);
+}
+
 static void dm_dp_mst_register_connector(struct drm_connector *connector)
 {
        struct drm_device *dev = connector->dev;
        struct amdgpu_device *adev = dev->dev_private;
+       struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 
        if (adev->mode_info.rfbdev)
                drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector);
@@ -416,6 +465,8 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector)
 
        drm_connector_register(connector);
 
+       if (aconnector->mst_connected)
+               dm_dp_mst_link_status_reset(connector);
 }
 
 static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
index 2da851b40042aee9b79eb2c666d45c0f5061fee0..8cf51da26657e29e72062b34aeed7e5d827f9e21 100644 (file)
@@ -31,5 +31,6 @@ struct amdgpu_dm_connector;
 
 void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
                                       struct amdgpu_dm_connector *aconnector);
+void dm_dp_mst_dc_sink_create(struct drm_connector *connector);
 
 #endif
index 3dce35e66b0917d2ec93420063b3477443788302..b142629a105841b603501291800e45b9ade30591 100644 (file)
@@ -900,6 +900,15 @@ bool dcn_validate_bandwidth(
                        v->override_vta_ps[input_idx] = pipe->plane_res.scl_data.taps.v_taps;
                        v->override_hta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.h_taps_c;
                        v->override_vta_pschroma[input_idx] = pipe->plane_res.scl_data.taps.v_taps_c;
+                       /*
+                        * Spreadsheet doesn't handle taps_c is one properly,
+                        * need to force Chroma to always be scaled to pass
+                        * bandwidth validation.
+                        */
+                       if (v->override_hta_pschroma[input_idx] == 1)
+                               v->override_hta_pschroma[input_idx] = 2;
+                       if (v->override_vta_pschroma[input_idx] == 1)
+                               v->override_vta_pschroma[input_idx] = 2;
                        v->source_scan[input_idx] = (pipe->plane_state->rotation % 2) ? dcn_bw_vert : dcn_bw_hor;
                }
                if (v->is_line_buffer_bpp_fixed == dcn_bw_yes)
index e27ed4a45265290690604b10e6d4df4fbee77514..42a111b9505dcb5190437a381c7dba8fda444719 100644 (file)
@@ -1801,7 +1801,7 @@ static void disable_link(struct dc_link *link, enum signal_type signal)
                link->link_enc->funcs->disable_output(link->link_enc, signal, link);
 }
 
-bool dp_active_dongle_validate_timing(
+static bool dp_active_dongle_validate_timing(
                const struct dc_crtc_timing *timing,
                const struct dc_dongle_caps *dongle_caps)
 {
@@ -1833,6 +1833,8 @@ bool dp_active_dongle_validate_timing(
        /* Check Color Depth and Pixel Clock */
        if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420)
                required_pix_clk /= 2;
+       else if (timing->pixel_encoding == PIXEL_ENCODING_YCBCR422)
+               required_pix_clk = required_pix_clk * 2 / 3;
 
        switch (timing->display_color_depth) {
        case COLOR_DEPTH_666:
index 07ff8d2faf3f4630276d9241092f605274375cda..d844fadcd56f048739e374cb8d534cba10d235b3 100644 (file)
@@ -2866,16 +2866,19 @@ static void dce110_apply_ctx_for_surface(
                int num_planes,
                struct dc_state *context)
 {
-       int i, be_idx;
+       int i;
 
        if (num_planes == 0)
                return;
 
-       be_idx = -1;
        for (i = 0; i < dc->res_pool->pipe_count; i++) {
-               if (stream == context->res_ctx.pipe_ctx[i].stream) {
-                       be_idx = context->res_ctx.pipe_ctx[i].stream_res.tg->inst;
-                       break;
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+               if (stream == pipe_ctx->stream) {
+                       if (!pipe_ctx->top_pipe &&
+                               (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
+                               dc->hwss.pipe_control_lock(dc, pipe_ctx, true);
                }
        }
 
@@ -2895,9 +2898,22 @@ static void dce110_apply_ctx_for_surface(
                                        context->stream_count);
 
                dce110_program_front_end_for_pipe(dc, pipe_ctx);
+
+               dc->hwss.update_plane_addr(dc, pipe_ctx);
+
                program_surface_visibility(dc, pipe_ctx);
 
        }
+
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
+
+               if ((stream == pipe_ctx->stream) &&
+                       (!pipe_ctx->top_pipe) &&
+                       (pipe_ctx->plane_state || old_pipe_ctx->plane_state))
+                       dc->hwss.pipe_control_lock(dc, pipe_ctx, false);
+       }
 }
 
 static void dce110_power_down_fe(struct dc *dc, int fe_idx)
index 74e7c82bdc76a71080d8e22dd6db90179ffee54e..a9d55d0dd69e009f4a31038c9529c86c05904ec7 100644 (file)
@@ -159,11 +159,10 @@ bool dpp_get_optimal_number_of_taps(
                        scl_data->taps.h_taps = 1;
                if (IDENTITY_RATIO(scl_data->ratios.vert))
                        scl_data->taps.v_taps = 1;
-               /*
-                * Spreadsheet doesn't handle taps_c is one properly,
-                * need to force Chroma to always be scaled to pass
-                * bandwidth validation.
-                */
+               if (IDENTITY_RATIO(scl_data->ratios.horz_c))
+                       scl_data->taps.h_taps_c = 1;
+               if (IDENTITY_RATIO(scl_data->ratios.vert_c))
+                       scl_data->taps.v_taps_c = 1;
        }
 
        return true;
index a9782b1aba478c202df3ab70e824db04e4ae1003..34daf895f848e848699b1a33668e4f7f79f3207b 100644 (file)
@@ -1360,7 +1360,7 @@ void dpp1_cm_set_output_csc_adjustment(
 
 void dpp1_cm_set_output_csc_default(
                struct dpp *dpp_base,
-               const struct default_adjustment *default_adjust);
+               enum dc_color_space colorspace);
 
 void dpp1_cm_set_gamut_remap(
        struct dpp *dpp,
index 40627c244bf5bc49b3615cdc9e7cd3605029bb43..ed1216b53465db99eb3389611b2892350e509123 100644 (file)
@@ -225,14 +225,13 @@ void dpp1_cm_set_gamut_remap(
 
 void dpp1_cm_set_output_csc_default(
                struct dpp *dpp_base,
-               const struct default_adjustment *default_adjust)
+               enum dc_color_space colorspace)
 {
 
        struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
        uint32_t ocsc_mode = 0;
 
-       if (default_adjust != NULL) {
-               switch (default_adjust->out_color_space) {
+       switch (colorspace) {
                case COLOR_SPACE_SRGB:
                case COLOR_SPACE_2020_RGB_FULLRANGE:
                        ocsc_mode = 0;
@@ -253,7 +252,6 @@ void dpp1_cm_set_output_csc_default(
                case COLOR_SPACE_UNKNOWN:
                default:
                        break;
-               }
        }
 
        REG_SET(CM_OCSC_CONTROL, 0, CM_OCSC_MODE, ocsc_mode);
index 961ad5c3b45412062e85255074cc374e9fe21763..05dc01e54531755ebcc9034d42810b2c68537364 100644 (file)
@@ -2097,6 +2097,8 @@ static void program_csc_matrix(struct pipe_ctx *pipe_ctx,
                        tbl_entry.color_space = color_space;
                        //tbl_entry.regval = matrix;
                        pipe_ctx->plane_res.dpp->funcs->opp_set_csc_adjustment(pipe_ctx->plane_res.dpp, &tbl_entry);
+       } else {
+               pipe_ctx->plane_res.dpp->funcs->opp_set_csc_default(pipe_ctx->plane_res.dpp, colorspace);
        }
 }
 static bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx)
index 83a68460edcda8372a5474f6ea0e95697f84679e..9420dfb94d39ef5da48d7718236863945cfa9448 100644 (file)
@@ -64,7 +64,7 @@ struct dpp_funcs {
 
        void (*opp_set_csc_default)(
                struct dpp *dpp,
-               const struct default_adjustment *default_adjust);
+               enum dc_color_space colorspace);
 
        void (*opp_set_csc_adjustment)(
                struct dpp *dpp,
index 2e065facdce74cae05cdc7b86fdcf9643d886ba0..a0f4d2a2a481085095dc7a76281e636b1161db7e 100644 (file)
@@ -168,16 +168,23 @@ static void armada_drm_crtc_update(struct armada_crtc *dcrtc)
 void armada_drm_plane_calc_addrs(u32 *addrs, struct drm_framebuffer *fb,
        int x, int y)
 {
+       const struct drm_format_info *format = fb->format;
+       unsigned int num_planes = format->num_planes;
        u32 addr = drm_fb_obj(fb)->dev_addr;
-       int num_planes = fb->format->num_planes;
        int i;
 
        if (num_planes > 3)
                num_planes = 3;
 
-       for (i = 0; i < num_planes; i++)
+       addrs[0] = addr + fb->offsets[0] + y * fb->pitches[0] +
+                  x * format->cpp[0];
+
+       y /= format->vsub;
+       x /= format->hsub;
+
+       for (i = 1; i < num_planes; i++)
                addrs[i] = addr + fb->offsets[i] + y * fb->pitches[i] +
-                            x * fb->format->cpp[i];
+                            x * format->cpp[i];
        for (; i < 3; i++)
                addrs[i] = 0;
 }
@@ -744,15 +751,14 @@ void armada_drm_crtc_plane_disable(struct armada_crtc *dcrtc,
        if (plane->fb)
                drm_framebuffer_put(plane->fb);
 
-       /* Power down the Y/U/V FIFOs */
-       sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66;
-
        /* Power down most RAMs and FIFOs if this is the primary plane */
        if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
-               sram_para1 |= CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 |
-                             CFG_PDWN32x32 | CFG_PDWN64x66;
+               sram_para1 = CFG_PDWN256x32 | CFG_PDWN256x24 | CFG_PDWN256x8 |
+                            CFG_PDWN32x32 | CFG_PDWN64x66;
                dma_ctrl0_mask = CFG_GRA_ENA;
        } else {
+               /* Power down the Y/U/V FIFOs */
+               sram_para1 = CFG_PDWN16x66 | CFG_PDWN32x66;
                dma_ctrl0_mask = CFG_DMA_ENA;
        }
 
@@ -1225,17 +1231,13 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
 
        ret = devm_request_irq(dev, irq, armada_drm_irq, 0, "armada_drm_crtc",
                               dcrtc);
-       if (ret < 0) {
-               kfree(dcrtc);
-               return ret;
-       }
+       if (ret < 0)
+               goto err_crtc;
 
        if (dcrtc->variant->init) {
                ret = dcrtc->variant->init(dcrtc, dev);
-               if (ret) {
-                       kfree(dcrtc);
-                       return ret;
-               }
+               if (ret)
+                       goto err_crtc;
        }
 
        /* Ensure AXI pipeline is enabled */
@@ -1246,13 +1248,15 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
        dcrtc->crtc.port = port;
 
        primary = kzalloc(sizeof(*primary), GFP_KERNEL);
-       if (!primary)
-               return -ENOMEM;
+       if (!primary) {
+               ret = -ENOMEM;
+               goto err_crtc;
+       }
 
        ret = armada_drm_plane_init(primary);
        if (ret) {
                kfree(primary);
-               return ret;
+               goto err_crtc;
        }
 
        ret = drm_universal_plane_init(drm, &primary->base, 0,
@@ -1263,7 +1267,7 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
                                       DRM_PLANE_TYPE_PRIMARY, NULL);
        if (ret) {
                kfree(primary);
-               return ret;
+               goto err_crtc;
        }
 
        ret = drm_crtc_init_with_planes(drm, &dcrtc->crtc, &primary->base, NULL,
@@ -1282,6 +1286,9 @@ static int armada_drm_crtc_create(struct drm_device *drm, struct device *dev,
 
 err_crtc_init:
        primary->base.funcs->destroy(&primary->base);
+err_crtc:
+       kfree(dcrtc);
+
        return ret;
 }
 
index bab11f48357591325d5bab2f80de34981f4d5482..bfd3514fbe9b3e8ccc96e23e7d1118e401f90a8a 100644 (file)
@@ -42,6 +42,8 @@ struct armada_plane_work {
 };
 
 struct armada_plane_state {
+       u16 src_x;
+       u16 src_y;
        u32 src_hw;
        u32 dst_hw;
        u32 dst_yx;
index b411b608821a555320a16df12fe56a524a3c4dae..aba947696178e1a4e4e0e3953f506e366865f45b 100644 (file)
@@ -99,6 +99,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
 {
        struct armada_ovl_plane *dplane = drm_to_armada_ovl_plane(plane);
        struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
+       const struct drm_format_info *format;
        struct drm_rect src = {
                .x1 = src_x,
                .y1 = src_y,
@@ -117,7 +118,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
        };
        uint32_t val, ctrl0;
        unsigned idx = 0;
-       bool visible;
+       bool visible, fb_changed;
        int ret;
 
        trace_armada_ovl_plane_update(plane, crtc, fb,
@@ -138,6 +139,18 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
        if (!visible)
                ctrl0 &= ~CFG_DMA_ENA;
 
+       /*
+        * Shifting a YUV packed format image by one pixel causes the U/V
+        * planes to swap.  Compensate for it by also toggling the UV swap.
+        */
+       format = fb->format;
+       if (format->num_planes == 1 && src.x1 >> 16 & (format->hsub - 1))
+               ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV);
+
+       fb_changed = plane->fb != fb ||
+                    dplane->base.state.src_x != src.x1 >> 16 ||
+                    dplane->base.state.src_y != src.y1 >> 16;
+
        if (!dcrtc->plane) {
                dcrtc->plane = plane;
                armada_ovl_update_attr(&dplane->prop, dcrtc);
@@ -145,7 +158,7 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
 
        /* FIXME: overlay on an interlaced display */
        /* Just updating the position/size? */
-       if (plane->fb == fb && dplane->base.state.ctrl0 == ctrl0) {
+       if (!fb_changed && dplane->base.state.ctrl0 == ctrl0) {
                val = (drm_rect_height(&src) & 0xffff0000) |
                      drm_rect_width(&src) >> 16;
                dplane->base.state.src_hw = val;
@@ -169,9 +182,8 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
        if (armada_drm_plane_work_wait(&dplane->base, HZ / 25) == 0)
                armada_drm_plane_work_cancel(dcrtc, &dplane->base);
 
-       if (plane->fb != fb) {
-               u32 addrs[3], pixel_format;
-               int num_planes, hsub;
+       if (fb_changed) {
+               u32 addrs[3];
 
                /*
                 * Take a reference on the new framebuffer - we want to
@@ -182,23 +194,11 @@ armada_ovl_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
                if (plane->fb)
                        armada_ovl_retire_fb(dplane, plane->fb);
 
-               src_y = src.y1 >> 16;
-               src_x = src.x1 >> 16;
+               dplane->base.state.src_y = src_y = src.y1 >> 16;
+               dplane->base.state.src_x = src_x = src.x1 >> 16;
 
                armada_drm_plane_calc_addrs(addrs, fb, src_x, src_y);
 
-               pixel_format = fb->format->format;
-               hsub = drm_format_horz_chroma_subsampling(pixel_format);
-               num_planes = fb->format->num_planes;
-
-               /*
-                * Annoyingly, shifting a YUYV-format image by one pixel
-                * causes the U/V planes to toggle.  Toggle the UV swap.
-                * (Unfortunately, this causes momentary colour flickering.)
-                */
-               if (src_x & (hsub - 1) && num_planes == 1)
-                       ctrl0 ^= CFG_DMA_MOD(CFG_SWAPUV);
-
                armada_reg_queue_set(dplane->vbl.regs, idx, addrs[0],
                                     LCD_SPU_DMA_START_ADDR_Y0);
                armada_reg_queue_set(dplane->vbl.regs, idx, addrs[1],
index 59849f02e2ad5bb74559ea85fbeb6fc1dd97bde6..1402c0e71b03d18866139056b12f0d5fd84b6afb 100644 (file)
@@ -220,17 +220,6 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
 
        mutex_lock(&dev->mode_config.idr_mutex);
 
-       /* Insert the new lessee into the tree */
-       id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
-       if (id < 0) {
-               error = id;
-               goto out_lessee;
-       }
-
-       lessee->lessee_id = id;
-       lessee->lessor = drm_master_get(lessor);
-       list_add_tail(&lessee->lessee_list, &lessor->lessees);
-
        idr_for_each_entry(leases, entry, object) {
                error = 0;
                if (!idr_find(&dev->mode_config.crtc_idr, object))
@@ -246,6 +235,17 @@ static struct drm_master *drm_lease_create(struct drm_master *lessor, struct idr
                }
        }
 
+       /* Insert the new lessee into the tree */
+       id = idr_alloc(&(drm_lease_owner(lessor)->lessee_idr), lessee, 1, 0, GFP_KERNEL);
+       if (id < 0) {
+               error = id;
+               goto out_lessee;
+       }
+
+       lessee->lessee_id = id;
+       lessee->lessor = drm_master_get(lessor);
+       list_add_tail(&lessee->lessee_list, &lessor->lessees);
+
        /* Move the leases over */
        lessee->leases = *leases;
        DRM_DEBUG_LEASE("new lessee %d %p, lessor %d %p\n", lessee->lessee_id, lessee, lessor->lessee_id, lessor);
index 37a93cdffb4ad0e7986a634df4d70ccc3fef286e..2c90519576a3e8b63a4c8361f18672db853ebcec 100644 (file)
@@ -558,11 +558,10 @@ int drm_plane_check_pixel_format(const struct drm_plane *plane, u32 format)
 }
 
 /*
- * setplane_internal - setplane handler for internal callers
+ * __setplane_internal - setplane handler for internal callers
  *
- * Note that we assume an extra reference has already been taken on fb.  If the
- * update fails, this reference will be dropped before return; if it succeeds,
- * the previous framebuffer (if any) will be unreferenced instead.
+ * This function will take a reference on the new fb for the plane
+ * on success.
  *
  * src_{x,y,w,h} are provided in 16.16 fixed point format
  */
@@ -630,14 +629,12 @@ static int __setplane_internal(struct drm_plane *plane,
        if (!ret) {
                plane->crtc = crtc;
                plane->fb = fb;
-               fb = NULL;
+               drm_framebuffer_get(plane->fb);
        } else {
                plane->old_fb = NULL;
        }
 
 out:
-       if (fb)
-               drm_framebuffer_put(fb);
        if (plane->old_fb)
                drm_framebuffer_put(plane->old_fb);
        plane->old_fb = NULL;
@@ -685,6 +682,7 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
        struct drm_plane *plane;
        struct drm_crtc *crtc = NULL;
        struct drm_framebuffer *fb = NULL;
+       int ret;
 
        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EINVAL;
@@ -717,15 +715,16 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
                }
        }
 
-       /*
-        * setplane_internal will take care of deref'ing either the old or new
-        * framebuffer depending on success.
-        */
-       return setplane_internal(plane, crtc, fb,
-                                plane_req->crtc_x, plane_req->crtc_y,
-                                plane_req->crtc_w, plane_req->crtc_h,
-                                plane_req->src_x, plane_req->src_y,
-                                plane_req->src_w, plane_req->src_h);
+       ret = setplane_internal(plane, crtc, fb,
+                               plane_req->crtc_x, plane_req->crtc_y,
+                               plane_req->crtc_w, plane_req->crtc_h,
+                               plane_req->src_x, plane_req->src_y,
+                               plane_req->src_w, plane_req->src_h);
+
+       if (fb)
+               drm_framebuffer_put(fb);
+
+       return ret;
 }
 
 static int drm_mode_cursor_universal(struct drm_crtc *crtc,
@@ -788,13 +787,12 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
                src_h = fb->height << 16;
        }
 
-       /*
-        * setplane_internal will take care of deref'ing either the old or new
-        * framebuffer depending on success.
-        */
        ret = __setplane_internal(crtc->cursor, crtc, fb,
-                               crtc_x, crtc_y, crtc_w, crtc_h,
-                               0, 0, src_w, src_h, ctx);
+                                 crtc_x, crtc_y, crtc_w, crtc_h,
+                                 0, 0, src_w, src_h, ctx);
+
+       if (fb)
+               drm_framebuffer_put(fb);
 
        /* Update successful; save new cursor position, if necessary */
        if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
index f776fc1cc543abf8e752a5133aaf1ca63fb2d8ff..cb4d09c70fd44647f30b6d10244f25e90db0835f 100644 (file)
@@ -369,40 +369,26 @@ static const struct file_operations drm_syncobj_file_fops = {
        .release = drm_syncobj_file_release,
 };
 
-static int drm_syncobj_alloc_file(struct drm_syncobj *syncobj)
-{
-       struct file *file = anon_inode_getfile("syncobj_file",
-                                              &drm_syncobj_file_fops,
-                                              syncobj, 0);
-       if (IS_ERR(file))
-               return PTR_ERR(file);
-
-       drm_syncobj_get(syncobj);
-       if (cmpxchg(&syncobj->file, NULL, file)) {
-               /* lost the race */
-               fput(file);
-       }
-
-       return 0;
-}
-
 int drm_syncobj_get_fd(struct drm_syncobj *syncobj, int *p_fd)
 {
-       int ret;
+       struct file *file;
        int fd;
 
        fd = get_unused_fd_flags(O_CLOEXEC);
        if (fd < 0)
                return fd;
 
-       if (!syncobj->file) {
-               ret = drm_syncobj_alloc_file(syncobj);
-               if (ret) {
-                       put_unused_fd(fd);
-                       return ret;
-               }
+       file = anon_inode_getfile("syncobj_file",
+                                 &drm_syncobj_file_fops,
+                                 syncobj, 0);
+       if (IS_ERR(file)) {
+               put_unused_fd(fd);
+               return PTR_ERR(file);
        }
-       fd_install(fd, syncobj->file);
+
+       drm_syncobj_get(syncobj);
+       fd_install(fd, file);
+
        *p_fd = fd;
        return 0;
 }
@@ -422,31 +408,24 @@ static int drm_syncobj_handle_to_fd(struct drm_file *file_private,
        return ret;
 }
 
-static struct drm_syncobj *drm_syncobj_fdget(int fd)
-{
-       struct file *file = fget(fd);
-
-       if (!file)
-               return NULL;
-       if (file->f_op != &drm_syncobj_file_fops)
-               goto err;
-
-       return file->private_data;
-err:
-       fput(file);
-       return NULL;
-};
-
 static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
                                    int fd, u32 *handle)
 {
-       struct drm_syncobj *syncobj = drm_syncobj_fdget(fd);
+       struct drm_syncobj *syncobj;
+       struct file *file;
        int ret;
 
-       if (!syncobj)
+       file = fget(fd);
+       if (!file)
                return -EINVAL;
 
+       if (file->f_op != &drm_syncobj_file_fops) {
+               fput(file);
+               return -EINVAL;
+       }
+
        /* take a reference to put in the idr */
+       syncobj = file->private_data;
        drm_syncobj_get(syncobj);
 
        idr_preload(GFP_KERNEL);
@@ -455,12 +434,14 @@ static int drm_syncobj_fd_to_handle(struct drm_file *file_private,
        spin_unlock(&file_private->syncobj_table_lock);
        idr_preload_end();
 
-       if (ret < 0) {
-               fput(syncobj->file);
-               return ret;
-       }
-       *handle = ret;
-       return 0;
+       if (ret > 0) {
+               *handle = ret;
+               ret = 0;
+       } else
+               drm_syncobj_put(syncobj);
+
+       fput(file);
+       return ret;
 }
 
 static int drm_syncobj_import_sync_file_fence(struct drm_file *file_private,
index 85d4c57870fb7a2c577803d12e3c0bf219cf056f..49af94627c8a1e1f3446dad2ebb625075112ba07 100644 (file)
@@ -2777,12 +2777,12 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 }
 
 static struct cmd_info *find_cmd_entry_any_ring(struct intel_gvt *gvt,
-               unsigned int opcode, int rings)
+               unsigned int opcode, unsigned long rings)
 {
        struct cmd_info *info = NULL;
        unsigned int ring;
 
-       for_each_set_bit(ring, (unsigned long *)&rings, I915_NUM_ENGINES) {
+       for_each_set_bit(ring, &rings, I915_NUM_ENGINES) {
                info = find_cmd_entry(gvt, opcode, ring);
                if (info)
                        break;
index 355120865efd14873726e8eae2e1ec6d6fb31b9f..309f3fa6794a92554c3e7da74429f162090da098 100644 (file)
@@ -266,6 +266,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
        /* Clear host CRT status, so guest couldn't detect this host CRT. */
        if (IS_BROADWELL(dev_priv))
                vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
+
+       vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
 }
 
 static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
@@ -282,7 +284,6 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
 static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
                                    int type, unsigned int resolution)
 {
-       struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
        struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
 
        if (WARN_ON(resolution >= GVT_EDID_NUM))
@@ -308,7 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
        port->type = type;
 
        emulate_monitor_status_change(vgpu);
-       vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
+
        return 0;
 }
 
index 8e331142badbcbad4ceebbd0c0b2e2fa2fb8584c..64d67ff9bf084a3c02e3c2f808ed9af066208f97 100644 (file)
@@ -1359,12 +1359,15 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp,
                        return ret;
        } else {
                if (!test_bit(index, spt->post_shadow_bitmap)) {
+                       int type = spt->shadow_page.type;
+
                        ppgtt_get_shadow_entry(spt, &se, index);
                        ret = ppgtt_handle_guest_entry_removal(gpt, &se, index);
                        if (ret)
                                return ret;
+                       ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
+                       ppgtt_set_shadow_entry(spt, &se, index);
                }
-
                ppgtt_set_post_shadow(spt, index);
        }
 
index 54b5d4c582b610b0164e151a1786b683ebc00618..e143004e66d59396796b16b36ffdcec122629058 100644 (file)
@@ -2368,6 +2368,9 @@ struct drm_i915_private {
         */
        struct workqueue_struct *wq;
 
+       /* ordered wq for modesets */
+       struct workqueue_struct *modeset_wq;
+
        /* Display functions */
        struct drm_i915_display_funcs display;
 
index ad4050f7ab3b6965db1ce1b8d150036354464078..5cfba89ed586391409cf02459344b668c9c181bc 100644 (file)
@@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
         * must wait for all rendering to complete to the object (as unbinding
         * must anyway), and retire the requests.
         */
-       ret = i915_gem_object_wait(obj,
-                                  I915_WAIT_INTERRUPTIBLE |
-                                  I915_WAIT_LOCKED |
-                                  I915_WAIT_ALL,
-                                  MAX_SCHEDULE_TIMEOUT,
-                                  NULL);
+       ret = i915_gem_object_set_to_cpu_domain(obj, false);
        if (ret)
                return ret;
 
-       i915_gem_retire_requests(to_i915(obj->base.dev));
-
        while ((vma = list_first_entry_or_null(&obj->vma_list,
                                               struct i915_vma,
                                               obj_link))) {
@@ -474,7 +467,7 @@ static void __fence_set_priority(struct dma_fence *fence, int prio)
        struct drm_i915_gem_request *rq;
        struct intel_engine_cs *engine;
 
-       if (!dma_fence_is_i915(fence))
+       if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
                return;
 
        rq = to_request(fence);
index 3866c49bc390ffd361f6f4e0070dffbd9120bf59..7923dfd9963c6e40b0e7e84d8a9d9bf0cca87a1f 100644 (file)
@@ -6977,6 +6977,7 @@ enum {
 #define  RESET_PCH_HANDSHAKE_ENABLE    (1<<4)
 
 #define GEN8_CHICKEN_DCPR_1            _MMIO(0x46430)
+#define   SKL_SELECT_ALTERNATE_DC_EXIT (1<<30)
 #define   MASK_WAKEMEM                 (1<<13)
 
 #define SKL_DFSM                       _MMIO(0x51000)
@@ -7026,6 +7027,8 @@ enum {
 #define GEN9_SLICE_COMMON_ECO_CHICKEN0         _MMIO(0x7308)
 #define  DISABLE_PIXEL_MASK_CAMMING            (1<<14)
 
+#define GEN9_SLICE_COMMON_ECO_CHICKEN1         _MMIO(0x731c)
+
 #define GEN7_L3SQCREG1                         _MMIO(0xB010)
 #define  VLV_B0_WA_L3SQCREG1_VALUE             0x00D30000
 
@@ -8522,6 +8525,7 @@ enum skl_power_gate {
 #define  BXT_CDCLK_CD2X_DIV_SEL_2      (2<<22)
 #define  BXT_CDCLK_CD2X_DIV_SEL_4      (3<<22)
 #define  BXT_CDCLK_CD2X_PIPE(pipe)     ((pipe)<<20)
+#define  CDCLK_DIVMUX_CD_OVERRIDE      (1<<19)
 #define  BXT_CDCLK_CD2X_PIPE_NONE      BXT_CDCLK_CD2X_PIPE(3)
 #define  BXT_CDCLK_SSA_PRECHARGE_ENABLE        (1<<16)
 #define  CDCLK_FREQ_DECIMAL_MASK       (0x7ff)
index e8ca67a129d28da6ef5b9d1de9d8357f7dc02f7e..ac236b88c99ca0fb07c49ec5d9487b1e0b38adfe 100644 (file)
@@ -367,6 +367,7 @@ struct i915_sw_dma_fence_cb {
        struct dma_fence *dma;
        struct timer_list timer;
        struct irq_work work;
+       struct rcu_head rcu;
 };
 
 static void timer_i915_sw_fence_wake(struct timer_list *t)
@@ -406,7 +407,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk)
        del_timer_sync(&cb->timer);
        dma_fence_put(cb->dma);
 
-       kfree(cb);
+       kfree_rcu(cb, rcu);
 }
 
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
index 5f8b9f1f40f19e84968c18e5fbd229731b392dea..bcbc7abe66935eef46c9799451b939dc6d516639 100644 (file)
@@ -186,7 +186,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
        struct intel_wait *wait, *n, *first;
 
        if (!b->irq_armed)
-               return;
+               goto wakeup_signaler;
 
        /* We only disarm the irq when we are idle (all requests completed),
         * so if the bottom-half remains asleep, it missed the request
@@ -208,6 +208,14 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
        b->waiters = RB_ROOT;
 
        spin_unlock_irq(&b->rb_lock);
+
+       /*
+        * The signaling thread may be asleep holding a reference to a request,
+        * that had its signaling cancelled prior to being preempted. We need
+        * to kick the signaler, just in case, to release any such reference.
+        */
+wakeup_signaler:
+       wake_up_process(b->signaler);
 }
 
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -651,23 +659,15 @@ static int intel_breadcrumbs_signaler(void *arg)
                }
 
                if (unlikely(do_schedule)) {
-                       DEFINE_WAIT(exec);
-
                        if (kthread_should_park())
                                kthread_parkme();
 
-                       if (kthread_should_stop()) {
-                               GEM_BUG_ON(request);
+                       if (unlikely(kthread_should_stop())) {
+                               i915_gem_request_put(request);
                                break;
                        }
 
-                       if (request)
-                               add_wait_queue(&request->execute, &exec);
-
                        schedule();
-
-                       if (request)
-                               remove_wait_queue(&request->execute, &exec);
                }
                i915_gem_request_put(request);
        } while (1);
index b2a6d62b71c049d27bd6664aab8083c80ae92265..60cf4e58389ae601faf40cd9c68055b43eb39494 100644 (file)
@@ -860,16 +860,10 @@ static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv,
 
 static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco)
 {
-       int min_cdclk = skl_calc_cdclk(0, vco);
        u32 val;
 
        WARN_ON(vco != 8100000 && vco != 8640000);
 
-       /* select the minimum CDCLK before enabling DPLL 0 */
-       val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk);
-       I915_WRITE(CDCLK_CTL, val);
-       POSTING_READ(CDCLK_CTL);
-
        /*
         * We always enable DPLL0 with the lowest link rate possible, but still
         * taking into account the VCO required to operate the eDP panel at the
@@ -923,7 +917,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
 {
        int cdclk = cdclk_state->cdclk;
        int vco = cdclk_state->vco;
-       u32 freq_select, pcu_ack;
+       u32 freq_select, pcu_ack, cdclk_ctl;
        int ret;
 
        WARN_ON((cdclk == 24000) != (vco == 0));
@@ -940,7 +934,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
                return;
        }
 
-       /* set CDCLK_CTL */
+       /* Choose frequency for this cdclk */
        switch (cdclk) {
        case 450000:
        case 432000:
@@ -968,10 +962,33 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
            dev_priv->cdclk.hw.vco != vco)
                skl_dpll0_disable(dev_priv);
 
+       cdclk_ctl = I915_READ(CDCLK_CTL);
+
+       if (dev_priv->cdclk.hw.vco != vco) {
+               /* Wa Display #1183: skl,kbl,cfl */
+               cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK);
+               cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk);
+               I915_WRITE(CDCLK_CTL, cdclk_ctl);
+       }
+
+       /* Wa Display #1183: skl,kbl,cfl */
+       cdclk_ctl |= CDCLK_DIVMUX_CD_OVERRIDE;
+       I915_WRITE(CDCLK_CTL, cdclk_ctl);
+       POSTING_READ(CDCLK_CTL);
+
        if (dev_priv->cdclk.hw.vco != vco)
                skl_dpll0_enable(dev_priv, vco);
 
-       I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk));
+       /* Wa Display #1183: skl,kbl,cfl */
+       cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK);
+       I915_WRITE(CDCLK_CTL, cdclk_ctl);
+
+       cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk);
+       I915_WRITE(CDCLK_CTL, cdclk_ctl);
+
+       /* Wa Display #1183: skl,kbl,cfl */
+       cdclk_ctl &= ~CDCLK_DIVMUX_CD_OVERRIDE;
+       I915_WRITE(CDCLK_CTL, cdclk_ctl);
        POSTING_READ(CDCLK_CTL);
 
        /* inform PCU of the change */
index e0843bb991699d0c81ec8242d1809a3e26527af8..58a3755544b292dfdd3c7e089f9179c51861d76d 100644 (file)
@@ -2128,6 +2128,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
        if (WARN_ON(!pll))
                return;
 
+        mutex_lock(&dev_priv->dpll_lock);
+
        if (IS_CANNONLAKE(dev_priv)) {
                /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
                val = I915_READ(DPCLKA_CFGCR0);
@@ -2157,6 +2159,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
        } else if (INTEL_INFO(dev_priv)->gen < 9) {
                I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll));
        }
+
+       mutex_unlock(&dev_priv->dpll_lock);
 }
 
 static void intel_ddi_clk_disable(struct intel_encoder *encoder)
index e8ccf89cb17b6843b878ec0ef0641055d68e1cf8..123585eeb87db849d84e78b0892e10f962b14d9b 100644 (file)
@@ -9944,11 +9944,10 @@ found:
        }
 
        ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0);
+       drm_framebuffer_put(fb);
        if (ret)
                goto fail;
 
-       drm_framebuffer_put(fb);
-
        ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
        if (ret)
                goto fail;
@@ -12545,11 +12544,15 @@ static int intel_atomic_commit(struct drm_device *dev,
        INIT_WORK(&state->commit_work, intel_atomic_commit_work);
 
        i915_sw_fence_commit(&intel_state->commit_ready);
-       if (nonblock)
+       if (nonblock && intel_state->modeset) {
+               queue_work(dev_priv->modeset_wq, &state->commit_work);
+       } else if (nonblock) {
                queue_work(system_unbound_wq, &state->commit_work);
-       else
+       } else {
+               if (intel_state->modeset)
+                       flush_workqueue(dev_priv->modeset_wq);
                intel_atomic_commit_tail(state);
-
+       }
 
        return 0;
 }
@@ -13195,7 +13198,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
        primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe);
        primary->check_plane = intel_check_primary_plane;
 
-       if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
+       if (INTEL_GEN(dev_priv) >= 10) {
                intel_primary_formats = skl_primary_formats;
                num_formats = ARRAY_SIZE(skl_primary_formats);
                modifiers = skl_format_modifiers_ccs;
@@ -14463,6 +14466,8 @@ int intel_modeset_init(struct drm_device *dev)
        enum pipe pipe;
        struct intel_crtc *crtc;
 
+       dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0);
+
        drm_mode_config_init(dev);
 
        dev->mode_config.min_width = 0;
@@ -15271,6 +15276,8 @@ void intel_modeset_cleanup(struct drm_device *dev)
        intel_cleanup_gt_powersave(dev_priv);
 
        intel_teardown_gmbus(dev_priv);
+
+       destroy_workqueue(dev_priv->modeset_wq);
 }
 
 void intel_connector_attach_encoder(struct intel_connector *connector,
index ab5bf4e2e28e21f3daa7072a37cb6895aba8233c..6074e04dc99fca3d269d78ac2aa4e9159e78aedb 100644 (file)
@@ -1390,6 +1390,11 @@ static int glk_init_workarounds(struct intel_engine_cs *engine)
        if (ret)
                return ret;
 
+       /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
+       ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1);
+       if (ret)
+               return ret;
+
        /* WaToEnableHwFixForPushConstHWBug:glk */
        WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
                          GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
index 3bf65288ffffd51719d0c4e8ce934ccd2d2f59e3..5809b29044fc573401f6116bb45762fc07e71596 100644 (file)
@@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv)
                };
 
                if (!pci_dev_present(atom_hdaudio_ids)) {
-                       DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n");
+                       DRM_INFO("HDaudio controller not detected, using LPE audio instead\n");
                        lpe_present = true;
                }
        }
index d36e2560743545cef7b32b24adb794e719ca5da2..e71a8cd50498c338aa0e0f3c03c2b62e0e8823b8 100644 (file)
@@ -974,6 +974,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 
        GEM_BUG_ON(prio == I915_PRIORITY_INVALID);
 
+       if (i915_gem_request_completed(request))
+               return;
+
        if (prio <= READ_ONCE(request->priotree.priority))
                return;
 
index 6e3b430fccdc7291426c2323be514908f1c32cf7..55ea5eb3b7df1d8b2a0b8a423d3e492904c33df3 100644 (file)
@@ -590,7 +590,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp,
        struct drm_i915_private *dev_priv = to_i915(dev);
 
        if (dev_priv->psr.active) {
-               i915_reg_t psr_ctl;
+               i915_reg_t psr_status;
                u32 psr_status_mask;
 
                if (dev_priv->psr.aux_frame_sync)
@@ -599,24 +599,24 @@ static void hsw_psr_disable(struct intel_dp *intel_dp,
                                        0);
 
                if (dev_priv->psr.psr2_support) {
-                       psr_ctl = EDP_PSR2_CTL;
+                       psr_status = EDP_PSR2_STATUS_CTL;
                        psr_status_mask = EDP_PSR2_STATUS_STATE_MASK;
 
-                       I915_WRITE(psr_ctl,
-                                  I915_READ(psr_ctl) &
+                       I915_WRITE(EDP_PSR2_CTL,
+                                  I915_READ(EDP_PSR2_CTL) &
                                   ~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE));
 
                } else {
-                       psr_ctl = EDP_PSR_STATUS_CTL;
+                       psr_status = EDP_PSR_STATUS_CTL;
                        psr_status_mask = EDP_PSR_STATUS_STATE_MASK;
 
-                       I915_WRITE(psr_ctl,
-                                  I915_READ(psr_ctl) & ~EDP_PSR_ENABLE);
+                       I915_WRITE(EDP_PSR_CTL,
+                                  I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE);
                }
 
                /* Wait till PSR is idle */
                if (intel_wait_for_register(dev_priv,
-                                           psr_ctl, psr_status_mask, 0,
+                                           psr_status, psr_status_mask, 0,
                                            2000))
                        DRM_ERROR("Timed out waiting for PSR Idle State\n");
 
index 8af286c63d3b6e9bd8e55b78f6de2fd814eaaf06..7e115f3927f65ff489fae8ccd496be67fdc814f2 100644 (file)
@@ -598,6 +598,11 @@ void gen9_enable_dc5(struct drm_i915_private *dev_priv)
 
        DRM_DEBUG_KMS("Enabling DC5\n");
 
+       /* Wa Display #1183: skl,kbl,cfl */
+       if (IS_GEN9_BC(dev_priv))
+               I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
+                          SKL_SELECT_ALTERNATE_DC_EXIT);
+
        gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
 }
 
@@ -625,6 +630,11 @@ void skl_disable_dc6(struct drm_i915_private *dev_priv)
 {
        DRM_DEBUG_KMS("Disabling DC6\n");
 
+       /* Wa Display #1183: skl,kbl,cfl */
+       if (IS_GEN9_BC(dev_priv))
+               I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
+                          SKL_SELECT_ALTERNATE_DC_EXIT);
+
        gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
 }
 
@@ -1786,6 +1796,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
        GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS |         \
        BIT_ULL(POWER_DOMAIN_MODESET) |                 \
        BIT_ULL(POWER_DOMAIN_AUX_A) |                   \
+       BIT_ULL(POWER_DOMAIN_GMBUS) |                   \
        BIT_ULL(POWER_DOMAIN_INIT))
 
 #define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS (                \
index 2615912430cc97098f0fe806e95e5e40c1ee96f7..435ff8662cfa823a56f5d84a8fe66d4bc8929230 100644 (file)
@@ -224,7 +224,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
                /* Determine if we can get a cache-coherent map, forcing
                 * uncached mapping if we can't.
                 */
-               if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
+               if (!nouveau_drm_use_coherent_gpu_mapping(drm))
                        nvbo->force_coherent = true;
        }
 
@@ -262,7 +262,8 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
                if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
                    (flags & TTM_PL_FLAG_VRAM) && !vmm->page[i].vram)
                        continue;
-               if ((flags & TTM_PL_FLAG_TT  ) && !vmm->page[i].host)
+               if ((flags & TTM_PL_FLAG_TT) &&
+                   (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
                        continue;
 
                /* Select this page size if it's the first that supports
index 8d4a5be3b913016c410c8226ad5b05b0bf75299b..56fe261b62683a8690ac8f3439426ca1e4c269ef 100644 (file)
@@ -152,9 +152,9 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence,
        work->cli = cli;
        mutex_lock(&cli->lock);
        list_add_tail(&work->head, &cli->worker);
-       mutex_unlock(&cli->lock);
        if (dma_fence_add_callback(fence, &work->cb, nouveau_cli_work_fence))
                nouveau_cli_work_fence(fence, &work->cb);
+       mutex_unlock(&cli->lock);
 }
 
 static void
index 3331e82ae9e7130b18f4a6f307cc284519f873d3..96f6bd8aee5d3a248d76c683b6146ebb8ef673c7 100644 (file)
@@ -157,8 +157,8 @@ struct nouveau_drm {
                struct nvif_object copy;
                int mtrr;
                int type_vram;
-               int type_host;
-               int type_ncoh;
+               int type_host[2];
+               int type_ncoh[2];
        } ttm;
 
        /* GEM interface support */
@@ -217,6 +217,13 @@ nouveau_drm(struct drm_device *dev)
        return dev->dev_private;
 }
 
+static inline bool
+nouveau_drm_use_coherent_gpu_mapping(struct nouveau_drm *drm)
+{
+       struct nvif_mmu *mmu = &drm->client.mmu;
+       return !(mmu->type[drm->ttm.type_host[0]].type & NVIF_MEM_UNCACHED);
+}
+
 int nouveau_pmops_suspend(struct device *);
 int nouveau_pmops_resume(struct device *);
 bool nouveau_pmops_runtime(void);
index c533d8e04afc0f1fc4708d85e069323291c428c3..be7357bf2246e6ae326c9b6750c2c183cb0974d9 100644 (file)
@@ -429,7 +429,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
        drm_fb_helper_unregister_fbi(&fbcon->helper);
        drm_fb_helper_fini(&fbcon->helper);
 
-       if (nouveau_fb->nvbo) {
+       if (nouveau_fb && nouveau_fb->nvbo) {
                nouveau_vma_del(&nouveau_fb->vma);
                nouveau_bo_unmap(nouveau_fb->nvbo);
                nouveau_bo_unpin(nouveau_fb->nvbo);
index 589a9621db763f98454485081a3f80e2324e717c..c002f896850739b343624247e7d52d94e34bf99d 100644 (file)
@@ -103,10 +103,10 @@ nouveau_mem_host(struct ttm_mem_reg *reg, struct ttm_dma_tt *tt)
        u8 type;
        int ret;
 
-       if (mmu->type[drm->ttm.type_host].type & NVIF_MEM_UNCACHED)
-               type = drm->ttm.type_ncoh;
+       if (!nouveau_drm_use_coherent_gpu_mapping(drm))
+               type = drm->ttm.type_ncoh[!!mem->kind];
        else
-               type = drm->ttm.type_host;
+               type = drm->ttm.type_host[0];
 
        if (mem->kind && !(mmu->type[type].type & NVIF_MEM_KIND))
                mem->comp = mem->kind = 0;
index 08b974b3048279813e2d67ad0d5b0055e68998c2..dff51a0ee0281e8f5924ffc0135d8b4baf8542f9 100644 (file)
@@ -235,27 +235,46 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
        drm->ttm.mem_global_ref.release = NULL;
 }
 
-int
-nouveau_ttm_init(struct nouveau_drm *drm)
+static int
+nouveau_ttm_init_host(struct nouveau_drm *drm, u8 kind)
 {
-       struct nvkm_device *device = nvxx_device(&drm->client.device);
-       struct nvkm_pci *pci = device->pci;
        struct nvif_mmu *mmu = &drm->client.mmu;
-       struct drm_device *dev = drm->dev;
-       int typei, ret;
+       int typei;
 
        typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE |
-                                                  NVIF_MEM_COHERENT);
+                                           kind | NVIF_MEM_COHERENT);
        if (typei < 0)
                return -ENOSYS;
 
-       drm->ttm.type_host = typei;
+       drm->ttm.type_host[!!kind] = typei;
 
-       typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE);
+       typei = nvif_mmu_type(mmu, NVIF_MEM_HOST | NVIF_MEM_MAPPABLE | kind);
        if (typei < 0)
                return -ENOSYS;
 
-       drm->ttm.type_ncoh = typei;
+       drm->ttm.type_ncoh[!!kind] = typei;
+       return 0;
+}
+
+int
+nouveau_ttm_init(struct nouveau_drm *drm)
+{
+       struct nvkm_device *device = nvxx_device(&drm->client.device);
+       struct nvkm_pci *pci = device->pci;
+       struct nvif_mmu *mmu = &drm->client.mmu;
+       struct drm_device *dev = drm->dev;
+       int typei, ret;
+
+       ret = nouveau_ttm_init_host(drm, 0);
+       if (ret)
+               return ret;
+
+       if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+           drm->client.device.info.chipset != 0x50) {
+               ret = nouveau_ttm_init_host(drm, NVIF_MEM_KIND);
+               if (ret)
+                       return ret;
+       }
 
        if (drm->client.device.info.platform != NV_DEVICE_INFO_V0_SOC &&
            drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
index 9e2628dd8e4d6734c2d7c5d073012bbb95b4fa4c..f5371d96b003c23cac9e1f34cf3deca3b54b06a6 100644 (file)
@@ -67,8 +67,8 @@ nouveau_vma_del(struct nouveau_vma **pvma)
                        nvif_vmm_put(&vma->vmm->vmm, &tmp);
                }
                list_del(&vma->head);
-               *pvma = NULL;
                kfree(*pvma);
+               *pvma = NULL;
        }
 }
 
index e146436156985a534fa14e0829db4560d6eb1459..00eeaaffeae565a04044fc55e52990eb71d1063b 100644 (file)
@@ -2369,7 +2369,7 @@ nv13b_chipset = {
        .imem = gk20a_instmem_new,
        .ltc = gp100_ltc_new,
        .mc = gp10b_mc_new,
-       .mmu = gf100_mmu_new,
+       .mmu = gp10b_mmu_new,
        .secboot = gp10b_secboot_new,
        .pmu = gm20b_pmu_new,
        .timer = gk20a_timer_new,
index a2978a37b4f3c72ef4e6f03b1fb7f2c0c7500478..700fc754f28a4c32a001c243822b5e97733ba0a7 100644 (file)
@@ -174,6 +174,7 @@ gf119_sor = {
                .links = gf119_sor_dp_links,
                .power = g94_sor_dp_power,
                .pattern = gf119_sor_dp_pattern,
+               .drive = gf119_sor_dp_drive,
                .vcpi = gf119_sor_dp_vcpi,
                .audio = gf119_sor_dp_audio,
                .audio_sym = gf119_sor_dp_audio_sym,
index 972370ed36f090d0c0323253b79735edd355db07..7c7efa4ea0d0edb391a27db2c6e99179799070f1 100644 (file)
@@ -36,6 +36,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
                        if (data) {
                                *ver = nvbios_rd08(bios, data + 0x00);
                                switch (*ver) {
+                               case 0x20:
                                case 0x21:
                                case 0x30:
                                case 0x40:
@@ -63,6 +64,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
        if (data && idx < *cnt) {
                u16 outp = nvbios_rd16(bios, data + *hdr + idx * *len);
                switch (*ver * !!outp) {
+               case 0x20:
                case 0x21:
                case 0x30:
                        *hdr = nvbios_rd08(bios, data + 0x04);
@@ -96,12 +98,16 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
                info->type = nvbios_rd16(bios, data + 0x00);
                info->mask = nvbios_rd16(bios, data + 0x02);
                switch (*ver) {
+               case 0x20:
+                       info->mask |= 0x00c0; /* match any link */
+                       /* fall-through */
                case 0x21:
                case 0x30:
                        info->flags     = nvbios_rd08(bios, data + 0x05);
                        info->script[0] = nvbios_rd16(bios, data + 0x06);
                        info->script[1] = nvbios_rd16(bios, data + 0x08);
-                       info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
+                       if (*len >= 0x0c)
+                               info->lnkcmp    = nvbios_rd16(bios, data + 0x0a);
                        if (*len >= 0x0f) {
                                info->script[2] = nvbios_rd16(bios, data + 0x0c);
                                info->script[3] = nvbios_rd16(bios, data + 0x0e);
@@ -170,6 +176,7 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
        memset(info, 0x00, sizeof(*info));
        if (data) {
                switch (*ver) {
+               case 0x20:
                case 0x21:
                        info->dc    = nvbios_rd08(bios, data + 0x02);
                        info->pe    = nvbios_rd08(bios, data + 0x03);
index 1ba7289684aa2116b6fcc4d05869f0d2b8322a39..db48a1daca0c7a3d786332ce25435839fcc10760 100644 (file)
@@ -249,7 +249,7 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
                        iobj->base.memory.ptrs = &nv50_instobj_fast;
                else
                        iobj->base.memory.ptrs = &nv50_instobj_slow;
-               refcount_inc(&iobj->maps);
+               refcount_set(&iobj->maps, 1);
        }
 
        mutex_unlock(&imem->subdev.mutex);
index b1b1f3626b96298fcdb76f1819fd7b97801d5b37..deb96de54b0030244ec88014bce526119c3fae91 100644 (file)
@@ -136,6 +136,13 @@ nvkm_pci_init(struct nvkm_subdev *subdev)
                return ret;
 
        pci->irq = pdev->irq;
+
+       /* Ensure MSI interrupts are armed, for the case where there are
+        * already interrupts pending (for whatever reason) at load time.
+        */
+       if (pci->msi)
+               pci->func->msi_rearm(pci);
+
        return ret;
 }
 
index e626eddf24d5e2231c2434a76d45ddd29067c6e4..23db74ae18263a92866467e36bf727bfc5c925b5 100644 (file)
@@ -78,6 +78,8 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core)
 
                        /* then read the message */
                        msg.len = cnt & 0xf;
+                       if (msg.len > CEC_MAX_MSG_SIZE - 2)
+                               msg.len = CEC_MAX_MSG_SIZE - 2;
                        msg.msg[0] = hdmi_read_reg(core->base,
                                                   HDMI_CEC_RX_CMD_HEADER);
                        msg.msg[1] = hdmi_read_reg(core->base,
@@ -104,26 +106,6 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core)
        }
 }
 
-static void hdmi_cec_transmit_fifo_empty(struct hdmi_core_data *core, u32 stat1)
-{
-       if (stat1 & 2) {
-               u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3);
-
-               cec_transmit_done(core->adap,
-                                 CEC_TX_STATUS_NACK |
-                                 CEC_TX_STATUS_MAX_RETRIES,
-                                 0, (dbg3 >> 4) & 7, 0, 0);
-       } else if (stat1 & 1) {
-               cec_transmit_done(core->adap,
-                                 CEC_TX_STATUS_ARB_LOST |
-                                 CEC_TX_STATUS_MAX_RETRIES,
-                                 0, 0, 0, 0);
-       } else if (stat1 == 0) {
-               cec_transmit_done(core->adap, CEC_TX_STATUS_OK,
-                                 0, 0, 0, 0);
-       }
-}
-
 void hdmi4_cec_irq(struct hdmi_core_data *core)
 {
        u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0);
@@ -132,27 +114,21 @@ void hdmi4_cec_irq(struct hdmi_core_data *core)
        hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_0, stat0);
        hdmi_write_reg(core->base, HDMI_CEC_INT_STATUS_1, stat1);
 
-       if (stat0 & 0x40)
+       if (stat0 & 0x20) {
+               cec_transmit_done(core->adap, CEC_TX_STATUS_OK,
+                                 0, 0, 0, 0);
                REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
-       else if (stat0 & 0x24)
-               hdmi_cec_transmit_fifo_empty(core, stat1);
-       if (stat1 & 2) {
+       } else if (stat1 & 0x02) {
                u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3);
 
                cec_transmit_done(core->adap,
                                  CEC_TX_STATUS_NACK |
                                  CEC_TX_STATUS_MAX_RETRIES,
                                  0, (dbg3 >> 4) & 7, 0, 0);
-       } else if (stat1 & 1) {
-               cec_transmit_done(core->adap,
-                                 CEC_TX_STATUS_ARB_LOST |
-                                 CEC_TX_STATUS_MAX_RETRIES,
-                                 0, 0, 0, 0);
+               REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
        }
        if (stat0 & 0x02)
                hdmi_cec_received_msg(core);
-       if (stat1 & 0x3)
-               REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7);
 }
 
 static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap)
@@ -231,18 +207,14 @@ static int hdmi_cec_adap_enable(struct cec_adapter *adap, bool enable)
        /*
         * Enable CEC interrupts:
         * Transmit Buffer Full/Empty Change event
-        * Transmitter FIFO Empty event
         * Receiver FIFO Not Empty event
         */
-       hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x26);
+       hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_0, 0x22);
        /*
         * Enable CEC interrupts:
-        * RX FIFO Overrun Error event
-        * Short Pulse Detected event
         * Frame Retransmit Count Exceeded event
-        * Start Bit Irregularity event
         */
-       hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x0f);
+       hdmi_write_reg(core->base, HDMI_CEC_INT_ENABLE_1, 0x02);
 
        /* cec calibration enable (self clearing) */
        hdmi_write_reg(core->base, HDMI_CEC_SETUP, 0x03);
index dda904ec0534cd9d84d3967b94bf5fa4f444df9e..500b6fb3e0284d2fdfc71265a64f0d5b51fe4f99 100644 (file)
@@ -175,11 +175,31 @@ static void sun4i_hdmi_mode_set(struct drm_encoder *encoder,
        writel(val, hdmi->base + SUN4I_HDMI_VID_TIMING_POL_REG);
 }
 
+static enum drm_mode_status sun4i_hdmi_mode_valid(struct drm_encoder *encoder,
+                                       const struct drm_display_mode *mode)
+{
+       struct sun4i_hdmi *hdmi = drm_encoder_to_sun4i_hdmi(encoder);
+       unsigned long rate = mode->clock * 1000;
+       unsigned long diff = rate / 200; /* +-0.5% allowed by HDMI spec */
+       long rounded_rate;
+
+       /* 165 MHz is the typical max pixelclock frequency for HDMI <= 1.2 */
+       if (rate > 165000000)
+               return MODE_CLOCK_HIGH;
+       rounded_rate = clk_round_rate(hdmi->tmds_clk, rate);
+       if (rounded_rate > 0 &&
+           max_t(unsigned long, rounded_rate, rate) -
+           min_t(unsigned long, rounded_rate, rate) < diff)
+               return MODE_OK;
+       return MODE_NOCLOCK;
+}
+
 static const struct drm_encoder_helper_funcs sun4i_hdmi_helper_funcs = {
        .atomic_check   = sun4i_hdmi_atomic_check,
        .disable        = sun4i_hdmi_disable,
        .enable         = sun4i_hdmi_enable,
        .mode_set       = sun4i_hdmi_mode_set,
+       .mode_valid     = sun4i_hdmi_mode_valid,
 };
 
 static const struct drm_encoder_funcs sun4i_hdmi_funcs = {
index e122f5b2a395583cc14302a9bc4166fbba671071..f4284b51bdca99a04e8eda109a4d67bf5c9fac74 100644 (file)
@@ -724,12 +724,12 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
        if (IS_ERR(tcon->crtc)) {
                dev_err(dev, "Couldn't create our CRTC\n");
                ret = PTR_ERR(tcon->crtc);
-               goto err_free_clocks;
+               goto err_free_dotclock;
        }
 
        ret = sun4i_rgb_init(drm, tcon);
        if (ret < 0)
-               goto err_free_clocks;
+               goto err_free_dotclock;
 
        if (tcon->quirks->needs_de_be_mux) {
                /*
index b0a1dedac8026e0ad89ee1227cb7b7881d3fee7d..476079f1255f6c51e5cba11ff6f05f1c315a691e 100644 (file)
@@ -2656,6 +2656,9 @@ static int tegra_sor_probe(struct platform_device *pdev)
                                name, err);
                        goto remove;
                }
+       } else {
+               /* fall back to the module clock on SOR0 (eDP/LVDS only) */
+               sor->clk_out = sor->clk;
        }
 
        sor->clk_parent = devm_clk_get(&pdev->dev, "parent");
index 44343a2bf55c65458a196b5968b0c494f1c569b0..5d252fb27a8228d6d92fe10f2db605a1a7a4a8a6 100644 (file)
@@ -455,6 +455,7 @@ ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
                freed += (nr_free_pool - shrink_pages) << pool->order;
                if (freed >= sc->nr_to_scan)
                        break;
+               shrink_pages <<= pool->order;
        }
        mutex_unlock(&lock);
        return freed;
@@ -543,7 +544,7 @@ static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags,
        int r = 0;
        unsigned i, j, cpages;
        unsigned npages = 1 << order;
-       unsigned max_cpages = min(count, (unsigned)NUM_PAGES_TO_ALLOC);
+       unsigned max_cpages = min(count << order, (unsigned)NUM_PAGES_TO_ALLOC);
 
        /* allocate array for page caching change */
        caching_array = kmalloc(max_cpages*sizeof(struct page *), GFP_KERNEL);
@@ -1006,6 +1007,8 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages)
        pr_info("Initializing pool allocator\n");
 
        _manager = kzalloc(sizeof(*_manager), GFP_KERNEL);
+       if (!_manager)
+               return -ENOMEM;
 
        ttm_page_pool_init_locked(&_manager->wc_pool, GFP_HIGHUSER, "wc", 0);
 
index 26eddbb628936b91f20a000c405bfbc536324e89..3dd62d75f5319dbffcd9d27ea6f60927d193eb92 100644 (file)
@@ -209,9 +209,6 @@ vc4_irq_postinstall(struct drm_device *dev)
 {
        struct vc4_dev *vc4 = to_vc4_dev(dev);
 
-       /* Undo the effects of a previous vc4_irq_uninstall. */
-       enable_irq(dev->irq);
-
        /* Enable both the render done and out of memory interrupts. */
        V3D_WRITE(V3D_INTENA, V3D_DRIVER_IRQS);
 
index 622cd43840b8c53588396b6d1d82b5d234a69113..493f392b3a0a90e70820d3582c0847ebe3cc0bd7 100644 (file)
@@ -327,6 +327,9 @@ static int vc4_v3d_runtime_resume(struct device *dev)
                return ret;
 
        vc4_v3d_init_hw(vc4->dev);
+
+       /* We disabled the IRQ as part of vc4_irq_uninstall in suspend. */
+       enable_irq(vc4->dev->irq);
        vc4_irq_postinstall(vc4->dev);
 
        return 0;
index 21c62a34e5580af7e56505a64bbae48707ae2599..87e8af5776a389166278e24c6087a49f03682dbb 100644 (file)
@@ -2731,6 +2731,8 @@ static int vmw_cmd_dx_view_define(struct vmw_private *dev_priv,
        }
 
        view_type = vmw_view_cmd_to_type(header->id);
+       if (view_type == vmw_view_max)
+               return -EINVAL;
        cmd = container_of(header, typeof(*cmd), header);
        ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
                                user_surface_converter,
index 0545740b3724f1d6c28b9e1dd169351fdd723949..641294aef1652e1d8599016090aacd4bb33364b0 100644 (file)
@@ -697,7 +697,6 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane)
        vps->pinned = 0;
 
        /* Mapping is managed by prepare_fb/cleanup_fb */
-       memset(&vps->guest_map, 0, sizeof(vps->guest_map));
        memset(&vps->host_map, 0, sizeof(vps->host_map));
        vps->cpp = 0;
 
@@ -760,11 +759,6 @@ vmw_du_plane_destroy_state(struct drm_plane *plane,
 
 
        /* Should have been freed by cleanup_fb */
-       if (vps->guest_map.virtual) {
-               DRM_ERROR("Guest mapping not freed\n");
-               ttm_bo_kunmap(&vps->guest_map);
-       }
-
        if (vps->host_map.virtual) {
                DRM_ERROR("Host mapping not freed\n");
                ttm_bo_kunmap(&vps->host_map);
index ff9c8389ff21c3b2923c8387455976d5425e33f3..cd9da2dd79af1a062d4aaa3d6b6ded468bd4207e 100644 (file)
@@ -175,7 +175,7 @@ struct vmw_plane_state {
        int pinned;
 
        /* For CPU Blit */
-       struct ttm_bo_kmap_obj host_map, guest_map;
+       struct ttm_bo_kmap_obj host_map;
        unsigned int cpp;
 };
 
index 90b5437fd787e0de7d360b5b697fb33d4a8551c3..b68d74888ab1100be82f8a2a9fdc3234a4e04293 100644 (file)
@@ -114,7 +114,7 @@ struct vmw_screen_target_display_unit {
        bool defined;
 
        /* For CPU Blit */
-       struct ttm_bo_kmap_obj host_map, guest_map;
+       struct ttm_bo_kmap_obj host_map;
        unsigned int cpp;
 };
 
@@ -695,7 +695,8 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
        s32 src_pitch, dst_pitch;
        u8 *src, *dst;
        bool not_used;
-
+       struct ttm_bo_kmap_obj guest_map;
+       int ret;
 
        if (!dirty->num_hits)
                return;
@@ -706,6 +707,13 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
        if (width == 0 || height == 0)
                return;
 
+       ret = ttm_bo_kmap(&ddirty->buf->base, 0, ddirty->buf->base.num_pages,
+                         &guest_map);
+       if (ret) {
+               DRM_ERROR("Failed mapping framebuffer for blit: %d\n",
+                         ret);
+               goto out_cleanup;
+       }
 
        /* Assume we are blitting from Host (display_srf) to Guest (dmabuf) */
        src_pitch = stdu->display_srf->base_size.width * stdu->cpp;
@@ -713,7 +721,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
        src += ddirty->top * src_pitch + ddirty->left * stdu->cpp;
 
        dst_pitch = ddirty->pitch;
-       dst = ttm_kmap_obj_virtual(&stdu->guest_map, &not_used);
+       dst = ttm_kmap_obj_virtual(&guest_map, &not_used);
        dst += ddirty->fb_top * dst_pitch + ddirty->fb_left * stdu->cpp;
 
 
@@ -772,6 +780,7 @@ static void vmw_stdu_dmabuf_cpu_commit(struct vmw_kms_dirty *dirty)
                vmw_fifo_commit(dev_priv, sizeof(*cmd));
        }
 
+       ttm_bo_kunmap(&guest_map);
 out_cleanup:
        ddirty->left = ddirty->top = ddirty->fb_left = ddirty->fb_top = S32_MAX;
        ddirty->right = ddirty->bottom = S32_MIN;
@@ -1109,9 +1118,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane,
 {
        struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state);
 
-       if (vps->guest_map.virtual)
-               ttm_bo_kunmap(&vps->guest_map);
-
        if (vps->host_map.virtual)
                ttm_bo_kunmap(&vps->host_map);
 
@@ -1277,33 +1283,11 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
         */
        if (vps->content_fb_type == SEPARATE_DMA &&
            !(dev_priv->capabilities & SVGA_CAP_3D)) {
-
-               struct vmw_framebuffer_dmabuf *new_vfbd;
-
-               new_vfbd = vmw_framebuffer_to_vfbd(new_fb);
-
-               ret = ttm_bo_reserve(&new_vfbd->buffer->base, false, false,
-                                    NULL);
-               if (ret)
-                       goto out_srf_unpin;
-
-               ret = ttm_bo_kmap(&new_vfbd->buffer->base, 0,
-                                 new_vfbd->buffer->base.num_pages,
-                                 &vps->guest_map);
-
-               ttm_bo_unreserve(&new_vfbd->buffer->base);
-
-               if (ret) {
-                       DRM_ERROR("Failed to map content buffer to CPU\n");
-                       goto out_srf_unpin;
-               }
-
                ret = ttm_bo_kmap(&vps->surf->res.backup->base, 0,
                                  vps->surf->res.backup->base.num_pages,
                                  &vps->host_map);
                if (ret) {
                        DRM_ERROR("Failed to map display buffer to CPU\n");
-                       ttm_bo_kunmap(&vps->guest_map);
                        goto out_srf_unpin;
                }
 
@@ -1350,7 +1334,6 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
        stdu->display_srf = vps->surf;
        stdu->content_fb_type = vps->content_fb_type;
        stdu->cpp = vps->cpp;
-       memcpy(&stdu->guest_map, &vps->guest_map, sizeof(vps->guest_map));
        memcpy(&stdu->host_map, &vps->host_map, sizeof(vps->host_map));
 
        if (!stdu->defined)
index f3fcb836a1f9edd0af5b1f31362987d22eff11b2..0c3f608131cff48fdbec637da27135d970534807 100644 (file)
@@ -551,7 +551,7 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item)
                ret = hid_add_field(parser, HID_FEATURE_REPORT, data);
                break;
        default:
-               hid_err(parser->device, "unknown main item tag 0x%x\n", item->tag);
+               hid_warn(parser->device, "unknown main item tag 0x%x\n", item->tag);
                ret = 0;
        }
 
index 68cdc962265b1051d7e399e0c6a181a4426673ed..271f31461da427d93459632b096c578d71f3ee44 100644 (file)
@@ -696,8 +696,16 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
                                              (u8 *)&word, 2);
                break;
        case I2C_SMBUS_I2C_BLOCK_DATA:
-               size = I2C_SMBUS_BLOCK_DATA;
-               /* fallthrough */
+               if (read_write == I2C_SMBUS_READ) {
+                       read_length = data->block[0];
+                       count = cp2112_write_read_req(buf, addr, read_length,
+                                                     command, NULL, 0);
+               } else {
+                       count = cp2112_write_req(buf, addr, command,
+                                                data->block + 1,
+                                                data->block[0]);
+               }
+               break;
        case I2C_SMBUS_BLOCK_DATA:
                if (I2C_SMBUS_READ == read_write) {
                        count = cp2112_write_read_req(buf, addr,
@@ -785,6 +793,9 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr,
        case I2C_SMBUS_WORD_DATA:
                data->word = le16_to_cpup((__le16 *)buf);
                break;
+       case I2C_SMBUS_I2C_BLOCK_DATA:
+               memcpy(data->block + 1, buf, read_length);
+               break;
        case I2C_SMBUS_BLOCK_DATA:
                if (read_length > I2C_SMBUS_BLOCK_MAX) {
                        ret = -EPROTO;
index 9325545fc3ae1cac4e3919c1120af6841590d4f4..edc0f64bb584806f080c1b6f682c53a20acc8466 100644 (file)
 
 #ifdef CONFIG_HOLTEK_FF
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
-MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
-
 /*
  * These commands and parameters are currently known:
  *
@@ -223,3 +219,7 @@ static struct hid_driver holtek_driver = {
        .probe = holtek_probe,
 };
 module_hid_driver(holtek_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Anssi Hannula <anssi.hannula@iki.fi>");
+MODULE_DESCRIPTION("Force feedback support for Holtek On Line Grip based devices");
index 76ed9a216f101e90ddaa3bb38a1e0e7175ea81d4..610223f0e94530635ac94ccf40b9f582c57d6d62 100644 (file)
@@ -1378,6 +1378,8 @@ void vmbus_device_unregister(struct hv_device *device_obj)
        pr_debug("child device %s unregistered\n",
                dev_name(&device_obj->device));
 
+       kset_unregister(device_obj->channels_kset);
+
        /*
         * Kick off the process of unregistering the device.
         * This will call vmbus_remove() and eventually vmbus_device_release()
index c9790e2c344016e91c26d02f4a69e6a15035b62a..af5123042990281c694311da2819107dd8336afb 100644 (file)
@@ -143,6 +143,7 @@ static int hwmon_thermal_add_sensor(struct device *dev,
                                    struct hwmon_device *hwdev, int index)
 {
        struct hwmon_thermal_data *tdata;
+       struct thermal_zone_device *tzd;
 
        tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
        if (!tdata)
@@ -151,8 +152,14 @@ static int hwmon_thermal_add_sensor(struct device *dev,
        tdata->hwdev = hwdev;
        tdata->index = index;
 
-       devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
-                                            &hwmon_thermal_ops);
+       tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
+                                                  &hwmon_thermal_ops);
+       /*
+        * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV,
+        * so ignore that error but forward any other error.
+        */
+       if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV))
+               return PTR_ERR(tzd);
 
        return 0;
 }
@@ -621,14 +628,20 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
                                if (!chip->ops->is_visible(drvdata, hwmon_temp,
                                                           hwmon_temp_input, j))
                                        continue;
-                               if (info[i]->config[j] & HWMON_T_INPUT)
-                                       hwmon_thermal_add_sensor(dev, hwdev, j);
+                               if (info[i]->config[j] & HWMON_T_INPUT) {
+                                       err = hwmon_thermal_add_sensor(dev,
+                                                               hwdev, j);
+                                       if (err)
+                                               goto free_device;
+                               }
                        }
                }
        }
 
        return hdev;
 
+free_device:
+       device_unregister(hdev);
 free_hwmon:
        kfree(hwdev);
 ida_remove:
index a1d687a664f85edc4eeb1bf50360cb02c65faa45..66f0268f37a6ca143f558a6c02080c1b702afcc3 100644 (file)
@@ -314,7 +314,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
 }
 #endif
 
-struct ib_device *__ib_device_get_by_index(u32 ifindex);
+struct ib_device *ib_device_get_by_index(u32 ifindex);
 /* RDMA device netlink */
 void nldev_init(void);
 void nldev_exit(void);
index 30914f3baa5f1ee5b43695c96f56c54b8ce2a2df..465520627e4b6b93c85aae4c276be44607eb7400 100644 (file)
@@ -134,7 +134,7 @@ static int ib_device_check_mandatory(struct ib_device *device)
        return 0;
 }
 
-struct ib_device *__ib_device_get_by_index(u32 index)
+static struct ib_device *__ib_device_get_by_index(u32 index)
 {
        struct ib_device *device;
 
@@ -145,6 +145,22 @@ struct ib_device *__ib_device_get_by_index(u32 index)
        return NULL;
 }
 
+/*
+ * Caller is responsible to return refrerence count by calling put_device()
+ */
+struct ib_device *ib_device_get_by_index(u32 index)
+{
+       struct ib_device *device;
+
+       down_read(&lists_rwsem);
+       device = __ib_device_get_by_index(index);
+       if (device)
+               get_device(&device->dev);
+
+       up_read(&lists_rwsem);
+       return device;
+}
+
 static struct ib_device *__ib_device_get_by_name(const char *name)
 {
        struct ib_device *device;
index 9a05245a1acf4a7fd010fb7f038e84d7bbf5003e..0dcd1aa6f683e8bc73901ecb4fee08c8a167801e 100644 (file)
@@ -142,27 +142,34 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
 
-       device = __ib_device_get_by_index(index);
+       device = ib_device_get_by_index(index);
        if (!device)
                return -EINVAL;
 
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (!msg)
-               return -ENOMEM;
+       if (!msg) {
+               err = -ENOMEM;
+               goto err;
+       }
 
        nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
                        RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
                        0, 0);
 
        err = fill_dev_info(msg, device);
-       if (err) {
-               nlmsg_free(msg);
-               return err;
-       }
+       if (err)
+               goto err_free;
 
        nlmsg_end(msg, nlh);
 
+       put_device(&device->dev);
        return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_free:
+       nlmsg_free(msg);
+err:
+       put_device(&device->dev);
+       return err;
 }
 
 static int _nldev_get_dumpit(struct ib_device *device,
@@ -220,31 +227,40 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
                return -EINVAL;
 
        index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
-       device = __ib_device_get_by_index(index);
+       device = ib_device_get_by_index(index);
        if (!device)
                return -EINVAL;
 
        port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
-       if (!rdma_is_port_valid(device, port))
-               return -EINVAL;
+       if (!rdma_is_port_valid(device, port)) {
+               err = -EINVAL;
+               goto err;
+       }
 
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-       if (!msg)
-               return -ENOMEM;
+       if (!msg) {
+               err = -ENOMEM;
+               goto err;
+       }
 
        nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
                        RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
                        0, 0);
 
        err = fill_port_info(msg, device, port);
-       if (err) {
-               nlmsg_free(msg);
-               return err;
-       }
+       if (err)
+               goto err_free;
 
        nlmsg_end(msg, nlh);
+       put_device(&device->dev);
 
        return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+
+err_free:
+       nlmsg_free(msg);
+err:
+       put_device(&device->dev);
+       return err;
 }
 
 static int nldev_port_get_dumpit(struct sk_buff *skb,
@@ -265,7 +281,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
                return -EINVAL;
 
        ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
-       device = __ib_device_get_by_index(ifindex);
+       device = ib_device_get_by_index(ifindex);
        if (!device)
                return -EINVAL;
 
@@ -299,7 +315,9 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
                nlmsg_end(skb, nlh);
        }
 
-out:   cb->args[0] = idx;
+out:
+       put_device(&device->dev);
+       cb->args[0] = idx;
        return skb->len;
 }
 
index feafdb961c485c61e3842d6a946d83b1bf7176b8..59b2f96d986aa2b86491d29b8d7c3e25f244ba6f 100644 (file)
@@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
        if (ret)
                return ret;
 
+       if (!qp->qp_sec)
+               return 0;
+
        mutex_lock(&real_qp->qp_sec->mutex);
        ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
                                          qp->qp_sec);
index d0202bb176a4a6a826b27f2b4327691e334ad4ea..840b24096690ddcdef6ce894c5ee21b6285bf238 100644 (file)
@@ -2074,8 +2074,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
                return -EOPNOTSUPP;
 
        if (ucore->inlen > sizeof(cmd)) {
-               if (ib_is_udata_cleared(ucore, sizeof(cmd),
-                                       ucore->inlen - sizeof(cmd)))
+               if (!ib_is_udata_cleared(ucore, sizeof(cmd),
+                                        ucore->inlen - sizeof(cmd)))
                        return -EOPNOTSUPP;
        }
 
index 3fb8fb6cc824ef09f9c9c229e5a99a3e4801a65e..e36d27ed4daae3d46cb88b3ab4f747c34e6abb49 100644 (file)
@@ -1438,7 +1438,8 @@ int ib_close_qp(struct ib_qp *qp)
        spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
 
        atomic_dec(&real_qp->usecnt);
-       ib_close_shared_qp_security(qp->qp_sec);
+       if (qp->qp_sec)
+               ib_close_shared_qp_security(qp->qp_sec);
        kfree(qp);
 
        return 0;
index b7bfc536e00fd8c7b241c0f56539d1394d235acf..6f2b26126c64a4503b6a3bf8b8c3991b65b65012 100644 (file)
@@ -395,7 +395,7 @@ next_cqe:
 
 static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
 {
-       if (CQE_OPCODE(cqe) == C4IW_DRAIN_OPCODE) {
+       if (DRAIN_CQE(cqe)) {
                WARN_ONCE(1, "Unexpected DRAIN CQE qp id %u!\n", wq->sq.qid);
                return 0;
        }
@@ -494,7 +494,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
        /*
         * Special cqe for drain WR completions...
         */
-       if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
+       if (DRAIN_CQE(hw_cqe)) {
                *cookie = CQE_DRAIN_COOKIE(hw_cqe);
                *cqe = *hw_cqe;
                goto skip_cqe;
@@ -571,10 +571,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
                        ret = -EAGAIN;
                        goto skip_cqe;
                }
-               if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
+               if (unlikely(!CQE_STATUS(hw_cqe) &&
+                            CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
                        t4_set_wq_in_error(wq);
-                       hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN));
-                       goto proc_cqe;
+                       hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
                }
                goto proc_cqe;
        }
@@ -748,9 +748,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
                                c4iw_invalidate_mr(qhp->rhp,
                                                   CQE_WRID_FR_STAG(&cqe));
                        break;
-               case C4IW_DRAIN_OPCODE:
-                       wc->opcode = IB_WC_SEND;
-                       break;
                default:
                        pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
                               CQE_OPCODE(&cqe), CQE_QPID(&cqe));
index 470f97a79ebb7f90e649179ab34b30ad0c089df7..65dd3726ca024db4e0fabff5c4527c676757065e 100644 (file)
@@ -693,8 +693,6 @@ static inline int to_ib_qp_state(int c4iw_qp_state)
        return IB_QPS_ERR;
 }
 
-#define C4IW_DRAIN_OPCODE FW_RI_SGE_EC_CR_RETURN
-
 static inline u32 c4iw_ib_to_tpt_access(int a)
 {
        return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
index 38bddd02a9437470e0f3ed98a7e55afbc8cc7384..d5c92fc520d6471f5c1f41d77dd15b280a8a651b 100644 (file)
@@ -790,21 +790,57 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
        return 0;
 }
 
-static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
+static int ib_to_fw_opcode(int ib_opcode)
+{
+       int opcode;
+
+       switch (ib_opcode) {
+       case IB_WR_SEND_WITH_INV:
+               opcode = FW_RI_SEND_WITH_INV;
+               break;
+       case IB_WR_SEND:
+               opcode = FW_RI_SEND;
+               break;
+       case IB_WR_RDMA_WRITE:
+               opcode = FW_RI_RDMA_WRITE;
+               break;
+       case IB_WR_RDMA_READ:
+       case IB_WR_RDMA_READ_WITH_INV:
+               opcode = FW_RI_READ_REQ;
+               break;
+       case IB_WR_REG_MR:
+               opcode = FW_RI_FAST_REGISTER;
+               break;
+       case IB_WR_LOCAL_INV:
+               opcode = FW_RI_LOCAL_INV;
+               break;
+       default:
+               opcode = -EINVAL;
+       }
+       return opcode;
+}
+
+static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
 {
        struct t4_cqe cqe = {};
        struct c4iw_cq *schp;
        unsigned long flag;
        struct t4_cq *cq;
+       int opcode;
 
        schp = to_c4iw_cq(qhp->ibqp.send_cq);
        cq = &schp->cq;
 
+       opcode = ib_to_fw_opcode(wr->opcode);
+       if (opcode < 0)
+               return opcode;
+
        cqe.u.drain_cookie = wr->wr_id;
        cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
-                                CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
+                                CQE_OPCODE_V(opcode) |
                                 CQE_TYPE_V(1) |
                                 CQE_SWCQE_V(1) |
+                                CQE_DRAIN_V(1) |
                                 CQE_QPID_V(qhp->wq.sq.qid));
 
        spin_lock_irqsave(&schp->lock, flag);
@@ -819,6 +855,23 @@ static void complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
                                           schp->ibcq.cq_context);
                spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
        }
+       return 0;
+}
+
+static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
+                               struct ib_send_wr **bad_wr)
+{
+       int ret = 0;
+
+       while (wr) {
+               ret = complete_sq_drain_wr(qhp, wr);
+               if (ret) {
+                       *bad_wr = wr;
+                       break;
+               }
+               wr = wr->next;
+       }
+       return ret;
 }
 
 static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
@@ -833,9 +886,10 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
 
        cqe.u.drain_cookie = wr->wr_id;
        cqe.header = cpu_to_be32(CQE_STATUS_V(T4_ERR_SWFLUSH) |
-                                CQE_OPCODE_V(C4IW_DRAIN_OPCODE) |
+                                CQE_OPCODE_V(FW_RI_SEND) |
                                 CQE_TYPE_V(0) |
                                 CQE_SWCQE_V(1) |
+                                CQE_DRAIN_V(1) |
                                 CQE_QPID_V(qhp->wq.sq.qid));
 
        spin_lock_irqsave(&rchp->lock, flag);
@@ -852,6 +906,14 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
        }
 }
 
+static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
+{
+       while (wr) {
+               complete_rq_drain_wr(qhp, wr);
+               wr = wr->next;
+       }
+}
+
 int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                   struct ib_send_wr **bad_wr)
 {
@@ -875,7 +937,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
         */
        if (qhp->wq.flushed) {
                spin_unlock_irqrestore(&qhp->lock, flag);
-               complete_sq_drain_wr(qhp, wr);
+               err = complete_sq_drain_wrs(qhp, wr, bad_wr);
                return err;
        }
        num_wrs = t4_sq_avail(&qhp->wq);
@@ -1023,7 +1085,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
         */
        if (qhp->wq.flushed) {
                spin_unlock_irqrestore(&qhp->lock, flag);
-               complete_rq_drain_wr(qhp, wr);
+               complete_rq_drain_wrs(qhp, wr);
                return err;
        }
        num_wrs = t4_rq_avail(&qhp->wq);
index e9ea94268d51545b07400ed9cba095d8461fb2a1..79e8ee12c391cf6d800911d4b2f0e16063c6de39 100644 (file)
@@ -197,6 +197,11 @@ struct t4_cqe {
 #define CQE_SWCQE_G(x)    ((((x) >> CQE_SWCQE_S)) & CQE_SWCQE_M)
 #define CQE_SWCQE_V(x)   ((x)<<CQE_SWCQE_S)
 
+#define CQE_DRAIN_S       10
+#define CQE_DRAIN_M       0x1
+#define CQE_DRAIN_G(x)    ((((x) >> CQE_DRAIN_S)) & CQE_DRAIN_M)
+#define CQE_DRAIN_V(x)   ((x)<<CQE_DRAIN_S)
+
 #define CQE_STATUS_S      5
 #define CQE_STATUS_M      0x1F
 #define CQE_STATUS_G(x)   ((((x) >> CQE_STATUS_S)) & CQE_STATUS_M)
@@ -213,6 +218,7 @@ struct t4_cqe {
 #define CQE_OPCODE_V(x)   ((x)<<CQE_OPCODE_S)
 
 #define SW_CQE(x)         (CQE_SWCQE_G(be32_to_cpu((x)->header)))
+#define DRAIN_CQE(x)      (CQE_DRAIN_G(be32_to_cpu((x)->header)))
 #define CQE_QPID(x)       (CQE_QPID_G(be32_to_cpu((x)->header)))
 #define CQE_TYPE(x)       (CQE_TYPE_G(be32_to_cpu((x)->header)))
 #define SQ_TYPE(x)       (CQE_TYPE((x)))
index 4a9b4d7efe6362f31457672882e882af3bfdfc46..8ce9118d4a7fbac52494de9e5b489dc44365706b 100644 (file)
@@ -1131,7 +1131,6 @@ struct hfi1_devdata {
        u16 pcie_lnkctl;
        u16 pcie_devctl2;
        u32 pci_msix0;
-       u32 pci_lnkctl3;
        u32 pci_tph2;
 
        /*
index 09e50fd2a08f07bf7b2d42d3d4b4a1a00644b2ce..8c7e7a60b71584d5e587f6ef664cc773ab991991 100644 (file)
@@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd)
        if (ret)
                goto error;
 
-       ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-                                    dd->pci_lnkctl3);
-       if (ret)
-               goto error;
-
-       ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
-       if (ret)
-               goto error;
-
+       if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+               ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+                                            dd->pci_tph2);
+               if (ret)
+                       goto error;
+       }
        return 0;
 
 error:
@@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd)
        if (ret)
                goto error;
 
-       ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
-                                   &dd->pci_lnkctl3);
-       if (ret)
-               goto error;
-
-       ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
-       if (ret)
-               goto error;
-
+       if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
+               ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
+                                           &dd->pci_tph2);
+               if (ret)
+                       goto error;
+       }
        return 0;
 
 error:
index 313bfb9ccb71a3b42ebacbaa9b98b91daca51a19..4975f3e6596e49aaa8586ad9a13da719ef4dd6ab 100644 (file)
@@ -642,7 +642,6 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
                goto err_free_mr;
 
        mr->max_pages = max_num_sg;
-
        err = mlx4_mr_enable(dev->dev, &mr->mmr);
        if (err)
                goto err_free_pl;
@@ -653,6 +652,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
        return &mr->ibmr;
 
 err_free_pl:
+       mr->ibmr.device = pd->device;
        mlx4_free_priv_pages(mr);
 err_free_mr:
        (void) mlx4_mr_free(dev->dev, &mr->mmr);
index 470995fa38d23ee9c5917f9986c53c582db8d542..6f6712f87a730de3cc8c05aeb73f50de73c07487 100644 (file)
@@ -47,17 +47,6 @@ int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
        return err;
 }
 
-int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
-                               bool reset, void *out, int out_size)
-{
-       u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
-
-       MLX5_SET(query_cong_statistics_in, in, opcode,
-                MLX5_CMD_OP_QUERY_CONG_STATISTICS);
-       MLX5_SET(query_cong_statistics_in, in, clear, reset);
-       return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
                               void *out, int out_size)
 {
index af4c24596274deabd07ea0d95b719f4fd5d990e1..78ffded7cc2c59e2744d6279d96adc6e10cd1348 100644 (file)
@@ -37,8 +37,6 @@
 #include <linux/mlx5/driver.h>
 
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
-int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
-                               bool reset, void *out, int out_size);
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
                               void *out, int out_size);
 int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
index 543d0a4c8bf36eb43af6d9047c7eaa6cd75d9638..8ac50de2b2421ea66cb23eff88c09f8ecf3a0f94 100644 (file)
@@ -1463,6 +1463,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        }
 
        INIT_LIST_HEAD(&context->vma_private_list);
+       mutex_init(&context->vma_private_list_mutex);
        INIT_LIST_HEAD(&context->db_page_list);
        mutex_init(&context->db_page_mutex);
 
@@ -1624,7 +1625,9 @@ static void  mlx5_ib_vma_close(struct vm_area_struct *area)
         * mlx5_ib_disassociate_ucontext().
         */
        mlx5_ib_vma_priv_data->vma = NULL;
+       mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
        list_del(&mlx5_ib_vma_priv_data->list);
+       mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
        kfree(mlx5_ib_vma_priv_data);
 }
 
@@ -1644,10 +1647,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
                return -ENOMEM;
 
        vma_prv->vma = vma;
+       vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
        vma->vm_private_data = vma_prv;
        vma->vm_ops =  &mlx5_ib_vm_ops;
 
+       mutex_lock(&ctx->vma_private_list_mutex);
        list_add(&vma_prv->list, vma_head);
+       mutex_unlock(&ctx->vma_private_list_mutex);
 
        return 0;
 }
@@ -1690,6 +1696,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
         * mlx5_ib_vma_close.
         */
        down_write(&owning_mm->mmap_sem);
+       mutex_lock(&context->vma_private_list_mutex);
        list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
                                 list) {
                vma = vma_private->vma;
@@ -1704,6 +1711,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
                list_del(&vma_private->list);
                kfree(vma_private);
        }
+       mutex_unlock(&context->vma_private_list_mutex);
        up_write(&owning_mm->mmap_sem);
        mmput(owning_mm);
        put_task_struct(owning_process);
@@ -3737,34 +3745,6 @@ free:
        return ret;
 }
 
-static int mlx5_ib_query_cong_counters(struct mlx5_ib_dev *dev,
-                                      struct mlx5_ib_port *port,
-                                      struct rdma_hw_stats *stats)
-{
-       int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
-       void *out;
-       int ret, i;
-       int offset = port->cnts.num_q_counters;
-
-       out = kvzalloc(outlen, GFP_KERNEL);
-       if (!out)
-               return -ENOMEM;
-
-       ret = mlx5_cmd_query_cong_counter(dev->mdev, false, out, outlen);
-       if (ret)
-               goto free;
-
-       for (i = 0; i < port->cnts.num_cong_counters; i++) {
-               stats->value[i + offset] =
-                       be64_to_cpup((__be64 *)(out +
-                                    port->cnts.offsets[i + offset]));
-       }
-
-free:
-       kvfree(out);
-       return ret;
-}
-
 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
                                struct rdma_hw_stats *stats,
                                u8 port_num, int index)
@@ -3782,7 +3762,12 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
        num_counters = port->cnts.num_q_counters;
 
        if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
-               ret = mlx5_ib_query_cong_counters(dev, port, stats);
+               ret = mlx5_lag_query_cong_counters(dev->mdev,
+                                                  stats->value +
+                                                  port->cnts.num_q_counters,
+                                                  port->cnts.num_cong_counters,
+                                                  port->cnts.offsets +
+                                                  port->cnts.num_q_counters);
                if (ret)
                        return ret;
                num_counters += port->cnts.num_cong_counters;
index 6dd8cac78de2c44854f1d5006dd058d2bc81e170..2c5f3533bbc9cbf08ce4fc816ecb86bc5ff04612 100644 (file)
@@ -115,6 +115,8 @@ enum {
 struct mlx5_ib_vma_private_data {
        struct list_head list;
        struct vm_area_struct *vma;
+       /* protect vma_private_list add/del */
+       struct mutex *vma_private_list_mutex;
 };
 
 struct mlx5_ib_ucontext {
@@ -129,6 +131,8 @@ struct mlx5_ib_ucontext {
        /* Transport Domain number */
        u32                     tdn;
        struct list_head        vma_private_list;
+       /* protect vma_private_list add/del */
+       struct mutex            vma_private_list_mutex;
 
        unsigned long           upd_xlt_page;
        /* protect ODP/KSM */
index ee0ee1f9994b4fae933d8590b72ba138d29e176d..d109fe8290a70964d71e44ceb1408215f24d2058 100644 (file)
@@ -1637,6 +1637,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
        MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
        MLX5_SET(mkc, mkc, umr_en, 1);
 
+       mr->ibmr.device = pd->device;
        err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
        if (err)
                goto err_destroy_psv;
index 63bc2efc34eb57e37f364e5b73e5a157e6040e47..4f7bd3b6a3152937a8ec2fa4de8637e3d9391a9a 100644 (file)
@@ -94,7 +94,7 @@ struct pvrdma_cq {
        u32 cq_handle;
        bool is_kernel;
        atomic_t refcnt;
-       wait_queue_head_t wait;
+       struct completion free;
 };
 
 struct pvrdma_id_table {
@@ -175,7 +175,7 @@ struct pvrdma_srq {
        u32 srq_handle;
        int npages;
        refcount_t refcnt;
-       wait_queue_head_t wait;
+       struct completion free;
 };
 
 struct pvrdma_qp {
@@ -197,7 +197,7 @@ struct pvrdma_qp {
        bool is_kernel;
        struct mutex mutex; /* QP state mutex. */
        atomic_t refcnt;
-       wait_queue_head_t wait;
+       struct completion free;
 };
 
 struct pvrdma_dev {
index 3562c0c30492d07a7d1eddacb033ef769a1f9264..e529622cefad6a501492dc165458b738636842b3 100644 (file)
@@ -179,7 +179,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
                pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
 
        atomic_set(&cq->refcnt, 1);
-       init_waitqueue_head(&cq->wait);
+       init_completion(&cq->free);
        spin_lock_init(&cq->cq_lock);
 
        memset(cmd, 0, sizeof(*cmd));
@@ -230,8 +230,9 @@ err_cq:
 
 static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
 {
-       atomic_dec(&cq->refcnt);
-       wait_event(cq->wait, !atomic_read(&cq->refcnt));
+       if (atomic_dec_and_test(&cq->refcnt))
+               complete(&cq->free);
+       wait_for_completion(&cq->free);
 
        if (!cq->is_kernel)
                ib_umem_release(cq->umem);
index 1f4e18717a006da9cd3566318123f0fff1038df9..e92681878c93f4b0d4f29724700cc7ba474d7de4 100644 (file)
@@ -346,9 +346,8 @@ static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
                ibqp->event_handler(&e, ibqp->qp_context);
        }
        if (qp) {
-               atomic_dec(&qp->refcnt);
-               if (atomic_read(&qp->refcnt) == 0)
-                       wake_up(&qp->wait);
+               if (atomic_dec_and_test(&qp->refcnt))
+                       complete(&qp->free);
        }
 }
 
@@ -373,9 +372,8 @@ static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
                ibcq->event_handler(&e, ibcq->cq_context);
        }
        if (cq) {
-               atomic_dec(&cq->refcnt);
-               if (atomic_read(&cq->refcnt) == 0)
-                       wake_up(&cq->wait);
+               if (atomic_dec_and_test(&cq->refcnt))
+                       complete(&cq->free);
        }
 }
 
@@ -404,7 +402,7 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
        }
        if (srq) {
                if (refcount_dec_and_test(&srq->refcnt))
-                       wake_up(&srq->wait);
+                       complete(&srq->free);
        }
 }
 
@@ -539,9 +537,8 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
                if (cq && cq->ibcq.comp_handler)
                        cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
                if (cq) {
-                       atomic_dec(&cq->refcnt);
-                       if (atomic_read(&cq->refcnt))
-                               wake_up(&cq->wait);
+                       if (atomic_dec_and_test(&cq->refcnt))
+                               complete(&cq->free);
                }
                pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
        }
index 10420a18d02f46dc07f1698387e11105ab76c004..4059308e1454a5bfda72a31d9931afaa50d39441 100644 (file)
@@ -246,7 +246,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
                spin_lock_init(&qp->rq.lock);
                mutex_init(&qp->mutex);
                atomic_set(&qp->refcnt, 1);
-               init_waitqueue_head(&qp->wait);
+               init_completion(&qp->free);
 
                qp->state = IB_QPS_RESET;
 
@@ -428,8 +428,16 @@ static void pvrdma_free_qp(struct pvrdma_qp *qp)
 
        pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
 
-       atomic_dec(&qp->refcnt);
-       wait_event(qp->wait, !atomic_read(&qp->refcnt));
+       if (atomic_dec_and_test(&qp->refcnt))
+               complete(&qp->free);
+       wait_for_completion(&qp->free);
+
+       if (!qp->is_kernel) {
+               if (qp->rumem)
+                       ib_umem_release(qp->rumem);
+               if (qp->sumem)
+                       ib_umem_release(qp->sumem);
+       }
 
        pvrdma_page_dir_cleanup(dev, &qp->pdir);
 
index 826ccb864596dc19879211f9cdeefee47e536d5c..5acebb1ef631ae0070d28917530345689a2654de 100644 (file)
@@ -149,7 +149,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
 
        spin_lock_init(&srq->lock);
        refcount_set(&srq->refcnt, 1);
-       init_waitqueue_head(&srq->wait);
+       init_completion(&srq->free);
 
        dev_dbg(&dev->pdev->dev,
                "create shared receive queue from user space\n");
@@ -236,8 +236,9 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
        dev->srq_tbl[srq->srq_handle] = NULL;
        spin_unlock_irqrestore(&dev->srq_tbl_lock, flags);
 
-       refcount_dec(&srq->refcnt);
-       wait_event(srq->wait, !refcount_read(&srq->refcnt));
+       if (refcount_dec_and_test(&srq->refcnt))
+               complete(&srq->free);
+       wait_for_completion(&srq->free);
 
        /* There is no support for kernel clients, so this is safe. */
        ib_umem_release(srq->umem);
index 3b96cdaf9a835d0faebad2b6727dde1132f109f1..e6151a29c412a36d7d9db66833ad659ab853656b 100644 (file)
@@ -1236,13 +1236,10 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
                ipoib_ib_dev_down(dev);
 
        if (level == IPOIB_FLUSH_HEAVY) {
-               rtnl_lock();
                if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
                        ipoib_ib_dev_stop(dev);
 
-               result = ipoib_ib_dev_open(dev);
-               rtnl_unlock();
-               if (result)
+               if (ipoib_ib_dev_open(dev))
                        return;
 
                if (netif_queue_stopped(dev))
@@ -1282,7 +1279,9 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work)
        struct ipoib_dev_priv *priv =
                container_of(work, struct ipoib_dev_priv, flush_heavy);
 
+       rtnl_lock();
        __ipoib_ib_dev_flush(priv, IPOIB_FLUSH_HEAVY, 0);
+       rtnl_unlock();
 }
 
 void ipoib_ib_dev_cleanup(struct net_device *dev)
index 12b7f911f0e5b9b9fce4f034bde66db4fcc26e81..8880351df179382fa6d64b4b6e698c2fdb563e94 100644 (file)
@@ -902,8 +902,8 @@ static int path_rec_start(struct net_device *dev,
        return 0;
 }
 
-static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
-                          struct net_device *dev)
+static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
+                                         struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = ipoib_priv(dev);
        struct rdma_netdev *rn = netdev_priv(dev);
@@ -917,7 +917,15 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
                spin_unlock_irqrestore(&priv->lock, flags);
                ++dev->stats.tx_dropped;
                dev_kfree_skb_any(skb);
-               return;
+               return NULL;
+       }
+
+       /* To avoid race condition, make sure that the
+        * neigh will be added only once.
+        */
+       if (unlikely(!list_empty(&neigh->list))) {
+               spin_unlock_irqrestore(&priv->lock, flags);
+               return neigh;
        }
 
        path = __path_find(dev, daddr + 4);
@@ -956,7 +964,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
                        path->ah->last_send = rn->send(dev, skb, path->ah->ah,
                                                       IPOIB_QPN(daddr));
                        ipoib_neigh_put(neigh);
-                       return;
+                       return NULL;
                }
        } else {
                neigh->ah  = NULL;
@@ -973,7 +981,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 
        spin_unlock_irqrestore(&priv->lock, flags);
        ipoib_neigh_put(neigh);
-       return;
+       return NULL;
 
 err_path:
        ipoib_neigh_free(neigh);
@@ -983,6 +991,8 @@ err_drop:
 
        spin_unlock_irqrestore(&priv->lock, flags);
        ipoib_neigh_put(neigh);
+
+       return NULL;
 }
 
 static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
@@ -1091,8 +1101,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
        case htons(ETH_P_TIPC):
                neigh = ipoib_neigh_get(dev, phdr->hwaddr);
                if (unlikely(!neigh)) {
-                       neigh_add_path(skb, phdr->hwaddr, dev);
-                       return NETDEV_TX_OK;
+                       neigh = neigh_add_path(skb, phdr->hwaddr, dev);
+                       if (likely(!neigh))
+                               return NETDEV_TX_OK;
                }
                break;
        case htons(ETH_P_ARP):
index 93e149efc1f5fc0382b61dcfc9f84d786d8b52ca..9b3f47ae201603e8bc4e1527a0b3546a0de69878 100644 (file)
@@ -816,7 +816,10 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
                spin_lock_irqsave(&priv->lock, flags);
                if (!neigh) {
                        neigh = ipoib_neigh_alloc(daddr, dev);
-                       if (neigh) {
+                       /* Make sure that the neigh will be added only
+                        * once to mcast list.
+                        */
+                       if (neigh && list_empty(&neigh->list)) {
                                kref_get(&mcast->ah->ref);
                                neigh->ah       = mcast->ah;
                                list_add_tail(&neigh->list, &mcast->neigh_list);
index 8a1bd354b1cc1cb195d77be4d545a11c1bc93ac4..bfa576aa9f03c75727b9b61db67d47bfc8846797 100644 (file)
@@ -1013,8 +1013,7 @@ static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
                return -ENOMEM;
 
        attr->qp_state = IB_QPS_INIT;
-       attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
-           IB_ACCESS_REMOTE_WRITE;
+       attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
        attr->port_num = ch->sport->port;
        attr->pkey_index = 0;
 
@@ -2078,7 +2077,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                goto destroy_ib;
        }
 
-       guid = (__be16 *)&param->primary_path->sgid.global.interface_id;
+       guid = (__be16 *)&param->primary_path->dgid.global.interface_id;
        snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
                 be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
                 be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
index 3d8ff09eba57696677b242d29e33fb1bac3f592d..c868a878c84f1dd5de44729af5531473699a54bb 100644 (file)
@@ -163,7 +163,7 @@ static unsigned int get_time_pit(void)
 #define GET_TIME(x)    do { x = (unsigned int)rdtsc(); } while (0)
 #define DELTA(x,y)     ((y)-(x))
 #define TIME_NAME      "TSC"
-#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_TILE)
+#elif defined(__alpha__) || defined(CONFIG_MN10300) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) || defined(CONFIG_TILE)
 #define GET_TIME(x)    do { x = get_cycles(); } while (0)
 #define DELTA(x,y)     ((y)-(x))
 #define TIME_NAME      "get_cycles"
index ae473123583bb22bedbb33426f11eb8f802ea368..3d51175c4d7207aac2b7d625a2c9af26ec558cf0 100644 (file)
@@ -1651,7 +1651,7 @@ ims_pcu_get_cdc_union_desc(struct usb_interface *intf)
                                return union_desc;
 
                        dev_err(&intf->dev,
-                               "Union descriptor to short (%d vs %zd\n)",
+                               "Union descriptor too short (%d vs %zd)\n",
                                union_desc->bLength, sizeof(*union_desc));
                        return NULL;
                }
index 6bf56bb5f8d97dd4b5835e54c7e7ca764db98434..d91f3b1c53755f44dc9e6a469359d0173dbbf264 100644 (file)
@@ -326,8 +326,6 @@ static int xenkbd_probe(struct xenbus_device *dev,
                                     0, width, 0, 0);
                input_set_abs_params(mtouch, ABS_MT_POSITION_Y,
                                     0, height, 0, 0);
-               input_set_abs_params(mtouch, ABS_MT_PRESSURE,
-                                    0, 255, 0, 0);
 
                ret = input_mt_init_slots(mtouch, num_cont, INPUT_MT_DIRECT);
                if (ret) {
index b84cd978fce2da18df706ee0a6ef72ab0a45cb5b..a4aaa748e987f7e4db918e290d20d356f9eec21b 100644 (file)
@@ -1613,7 +1613,7 @@ static int elantech_set_properties(struct elantech_data *etd)
                case 5:
                        etd->hw_version = 3;
                        break;
-               case 6 ... 14:
+               case 6 ... 15:
                        etd->hw_version = 4;
                        break;
                default:
index e102d7764bc25e50649e34e229afda59552dfe8b..a458e5ec9e41eaee8a3e04c6643aca614c79a1c3 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/async.h>
 #include <linux/i2c.h>
@@ -1261,10 +1262,13 @@ static int elants_i2c_probe(struct i2c_client *client,
        }
 
        /*
-        * Systems using device tree should set up interrupt via DTS,
-        * the rest will use the default falling edge interrupts.
+        * Platform code (ACPI, DTS) should normally set up interrupt
+        * for us, but in case it did not let's fall back to using falling
+        * edge to be compatible with older Chromebooks.
         */
-       irqflags = client->dev.of_node ? 0 : IRQF_TRIGGER_FALLING;
+       irqflags = irq_get_trigger_type(client->irq);
+       if (!irqflags)
+               irqflags = IRQF_TRIGGER_FALLING;
 
        error = devm_request_threaded_irq(&client->dev, client->irq,
                                          NULL, elants_i2c_irq,
index fc080a7c2e1fbb8c17275d5d7e1c77462015a3d3..f1cd4dd9a4a3459a55ce66c79d39405972201e9d 100644 (file)
@@ -10,8 +10,7 @@
 #include <linux/of.h>
 #include <linux/firmware.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
-#include <linux/gpio/machine.h>
+#include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/acpi.h>
 #include <linux/interrupt.h>
index 7d5eb004091d1d64f6956825e9dd14bf975a8b68..dc4b73833419aa925af573cca1c0980dfc436412 100644 (file)
@@ -616,7 +616,9 @@ retry:
                       address, flags);
                break;
        default:
-               printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type);
+               printk(KERN_ERR "UNKNOWN type=0x%02x event[0]=0x%08x "
+                      "event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n",
+                      type, event[0], event[1], event[2], event[3]);
        }
 
        memset(__evt, 0, 4 * sizeof(u32));
@@ -1815,7 +1817,8 @@ static bool dma_ops_domain(struct protection_domain *domain)
        return domain->flags & PD_DMA_OPS_MASK;
 }
 
-static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
+static void set_dte_entry(u16 devid, struct protection_domain *domain,
+                         bool ats, bool ppr)
 {
        u64 pte_root = 0;
        u64 flags = 0;
@@ -1832,6 +1835,13 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
        if (ats)
                flags |= DTE_FLAG_IOTLB;
 
+       if (ppr) {
+               struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+               if (iommu_feature(iommu, FEATURE_EPHSUP))
+                       pte_root |= 1ULL << DEV_ENTRY_PPR;
+       }
+
        if (domain->flags & PD_IOMMUV2_MASK) {
                u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl);
                u64 glx  = domain->glx;
@@ -1894,9 +1904,9 @@ static void do_attach(struct iommu_dev_data *dev_data,
        domain->dev_cnt                 += 1;
 
        /* Update device table */
-       set_dte_entry(dev_data->devid, domain, ats);
+       set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2);
        if (alias != dev_data->devid)
-               set_dte_entry(alias, domain, ats);
+               set_dte_entry(alias, domain, ats, dev_data->iommu_v2);
 
        device_flush_dte(dev_data);
 }
@@ -2275,13 +2285,15 @@ static void update_device_table(struct protection_domain *domain)
        struct iommu_dev_data *dev_data;
 
        list_for_each_entry(dev_data, &domain->dev_list, list) {
-               set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled);
+               set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled,
+                             dev_data->iommu_v2);
 
                if (dev_data->devid == dev_data->alias)
                        continue;
 
                /* There is an alias, update device table entry for it */
-               set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled);
+               set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled,
+                             dev_data->iommu_v2);
        }
 }
 
@@ -4184,7 +4196,7 @@ static void amd_ir_update_irte(struct irq_data *irqd, struct amd_iommu *iommu,
                               struct irq_cfg *cfg);
 
 static int irq_remapping_activate(struct irq_domain *domain,
-                                 struct irq_data *irq_data, bool early)
+                                 struct irq_data *irq_data, bool reserve)
 {
        struct amd_ir_data *data = irq_data->chip_data;
        struct irq_2_irte *irte_info = &data->irq_2_irte;
index f6b24c7d8b70e335d94992fe54ccd4d72b062be2..6a877ebd058b1e87a22c9fb2fe3c10035d6123ef 100644 (file)
@@ -98,6 +98,7 @@
 #define FEATURE_HE             (1ULL<<8)
 #define FEATURE_PC             (1ULL<<9)
 #define FEATURE_GAM_VAPIC      (1ULL<<21)
+#define FEATURE_EPHSUP         (1ULL<<50)
 
 #define FEATURE_PASID_SHIFT    32
 #define FEATURE_PASID_MASK     (0x1fULL << FEATURE_PASID_SHIFT)
 /* macros and definitions for device table entries */
 #define DEV_ENTRY_VALID         0x00
 #define DEV_ENTRY_TRANSLATION   0x01
+#define DEV_ENTRY_PPR           0x34
 #define DEV_ENTRY_IR            0x3d
 #define DEV_ENTRY_IW            0x3e
 #define DEV_ENTRY_NO_PAGE_FAULT        0x62
index f122071688fd530b3fedd5e21ddbaf0ccd4bc203..3f2f1fc68b5240cea0b7d82bcbd46e1569d40665 100644 (file)
@@ -1698,13 +1698,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
        domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
        domain->geometry.aperture_end = (1UL << ias) - 1;
        domain->geometry.force_aperture = true;
-       smmu_domain->pgtbl_ops = pgtbl_ops;
 
        ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
-       if (ret < 0)
+       if (ret < 0) {
                free_io_pgtable_ops(pgtbl_ops);
+               return ret;
+       }
 
-       return ret;
+       smmu_domain->pgtbl_ops = pgtbl_ops;
+       return 0;
 }
 
 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
@@ -1731,7 +1733,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
 
 static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
 {
-       int i;
+       int i, j;
        struct arm_smmu_master_data *master = fwspec->iommu_priv;
        struct arm_smmu_device *smmu = master->smmu;
 
@@ -1739,6 +1741,13 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
                u32 sid = fwspec->ids[i];
                __le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
 
+               /* Bridged PCI devices may end up with duplicated IDs */
+               for (j = 0; j < i; j++)
+                       if (fwspec->ids[j] == sid)
+                               break;
+               if (j < i)
+                       continue;
+
                arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
        }
 }
@@ -2962,7 +2971,7 @@ static struct platform_driver arm_smmu_driver = {
 };
 module_platform_driver(arm_smmu_driver);
 
-IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", NULL);
+IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3");
 
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
index 78d4c6b8f1bad57477f598a45430a7e519e7af39..69e7c60792a8e37130f75b447232d25369888cb8 100644 (file)
@@ -2211,12 +2211,12 @@ static struct platform_driver arm_smmu_driver = {
 };
 module_platform_driver(arm_smmu_driver);
 
-IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL);
-IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL);
-IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL);
-IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL);
-IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL);
-IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL);
+IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1");
+IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2");
+IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400");
+IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401");
+IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500");
+IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2");
 
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
index 79c45650f8de6b2a20b0d1a79ca9dff5a17dec4c..2138102ef611de074ea16291770470feeaf2d4ee 100644 (file)
@@ -1353,8 +1353,15 @@ static const struct iommu_ops exynos_iommu_ops = {
 
 static int __init exynos_iommu_init(void)
 {
+       struct device_node *np;
        int ret;
 
+       np = of_find_matching_node(NULL, sysmmu_of_match);
+       if (!np)
+               return 0;
+
+       of_node_put(np);
+
        lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table",
                                LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL);
        if (!lv2table_kmem_cache) {
@@ -1394,4 +1401,4 @@ err_reg_driver:
 }
 core_initcall(exynos_iommu_init);
 
-IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu", NULL);
+IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu");
index 4a2de34895ec3177eb07082afe46a8921fa9f958..53227d6e911e91876d0115c9b5e675004af9ecf9 100644 (file)
@@ -64,7 +64,7 @@
 #define IOAPIC_RANGE_END       (0xfeefffff)
 #define IOVA_START_ADDR                (0x1000)
 
-#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
+#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
 
 #define MAX_AGAW_WIDTH 64
 #define MAX_AGAW_PFN_WIDTH     (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
@@ -1601,8 +1601,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
         * flush. However, device IOTLB doesn't need to be flushed in this case.
         */
        if (!cap_caching_mode(iommu->cap) || !map)
-               iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
-                                     addr, mask);
+               iommu_flush_dev_iotlb(domain, addr, mask);
 }
 
 static void iommu_flush_iova(struct iova_domain *iovad)
index ed1cf7c5a43ba33cc04ff32d1c88efcbb49bdbde..859b61ddd508b6f9dd457acde3da60d9a1a7cb22 100644 (file)
 #include <linux/interrupt.h>
 #include <asm/page.h>
 
+#define PASID_ENTRY_P          BIT_ULL(0)
+#define PASID_ENTRY_FLPM_5LP   BIT_ULL(9)
+#define PASID_ENTRY_SRE                BIT_ULL(11)
+
 static irqreturn_t prq_event_thread(int irq, void *d);
 
 struct pasid_entry {
@@ -41,6 +45,14 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
        struct page *pages;
        int order;
 
+       if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
+                       !cap_fl1gp_support(iommu->cap))
+               return -EINVAL;
+
+       if (cpu_feature_enabled(X86_FEATURE_LA57) &&
+                       !cap_5lp_support(iommu->cap))
+               return -EINVAL;
+
        /* Start at 2 because it's defined as 2^(1+PSS) */
        iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
 
@@ -129,6 +141,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
                pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
                       iommu->name);
                dmar_free_hwirq(irq);
+               iommu->pr_irq = 0;
                goto err;
        }
        dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
@@ -144,9 +157,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
        dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
        dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
 
-       free_irq(iommu->pr_irq, iommu);
-       dmar_free_hwirq(iommu->pr_irq);
-       iommu->pr_irq = 0;
+       if (iommu->pr_irq) {
+               free_irq(iommu->pr_irq, iommu);
+               dmar_free_hwirq(iommu->pr_irq);
+               iommu->pr_irq = 0;
+       }
 
        free_pages((unsigned long)iommu->prq, PRQ_ORDER);
        iommu->prq = NULL;
@@ -289,6 +304,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
        struct intel_svm_dev *sdev;
        struct intel_svm *svm = NULL;
        struct mm_struct *mm = NULL;
+       u64 pasid_entry_val;
        int pasid_max;
        int ret;
 
@@ -395,9 +411,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
                                kfree(sdev);
                                goto out;
                        }
-                       iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
+                       pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P;
                } else
-                       iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
+                       pasid_entry_val = (u64)__pa(init_mm.pgd) |
+                                         PASID_ENTRY_P | PASID_ENTRY_SRE;
+               if (cpu_feature_enabled(X86_FEATURE_LA57))
+                       pasid_entry_val |= PASID_ENTRY_FLPM_5LP;
+
+               iommu->pasid_table[svm->pasid].val = pasid_entry_val;
+
                wmb();
                /* In caching mode, we still have to flush with PASID 0 when
                 * a PASID table entry becomes present. Not entirely clear
index 76a193c7fcfc69b012d3e6e1f1dd246ab8d5acc8..66f69af2c2191f6a247a82acc227ed64b85c6bff 100644 (file)
@@ -1397,7 +1397,7 @@ static void intel_irq_remapping_free(struct irq_domain *domain,
 }
 
 static int intel_irq_remapping_activate(struct irq_domain *domain,
-                                       struct irq_data *irq_data, bool early)
+                                       struct irq_data *irq_data, bool reserve)
 {
        intel_ir_reconfigure_irte(irq_data, true);
        return 0;
index 3de5c0bcb5cc9f32b6fb615ce38f5729ae86d407..69fef991c651de26c60e5c3cb5f2e6be0c0d0fcb 100644 (file)
@@ -1303,6 +1303,9 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
        int ret;
 
        group = iommu_group_get(dev);
+       if (!group)
+               return -ENODEV;
+
        /*
         * Lock the group to make sure the device-count doesn't
         * change while we are attaching
@@ -1341,6 +1344,8 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
        struct iommu_group *group;
 
        group = iommu_group_get(dev);
+       if (!group)
+               return;
 
        mutex_lock(&group->mutex);
        if (iommu_group_device_count(group) != 1) {
index 8dce3a9de9d86e37b66dce131952aa19e0aadb2a..40ae6e87cb8802232b58d7b29f84d3f82930d7c2 100644 (file)
@@ -1108,18 +1108,8 @@ static void __exit ipmmu_exit(void)
 subsys_initcall(ipmmu_init);
 module_exit(ipmmu_exit);
 
-#ifdef CONFIG_IOMMU_DMA
-static int __init ipmmu_vmsa_iommu_of_setup(struct device_node *np)
-{
-       ipmmu_init();
-       return 0;
-}
-
-IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa",
-                ipmmu_vmsa_iommu_of_setup);
-IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795",
-                ipmmu_vmsa_iommu_of_setup);
-#endif
+IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa");
+IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795");
 
 MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
 MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
index 04f4d51ffacb1e50ae3f87f04fd3b23306f7b55b..0d3350463a3f48fe783e8fb6821e92ff1163f0b4 100644 (file)
@@ -823,6 +823,8 @@ static int msm_iommu_probe(struct platform_device *pdev)
                goto fail;
        }
 
+       bus_set_iommu(&platform_bus_type, &msm_iommu_ops);
+
        pr_info("device mapped at %p, irq %d with %d ctx banks\n",
                iommu->base, iommu->irq, iommu->ncb);
 
@@ -875,19 +877,7 @@ static void __exit msm_iommu_driver_exit(void)
 subsys_initcall(msm_iommu_driver_init);
 module_exit(msm_iommu_driver_exit);
 
-static int __init msm_iommu_init(void)
-{
-       bus_set_iommu(&platform_bus_type, &msm_iommu_ops);
-       return 0;
-}
-
-static int __init msm_iommu_of_setup(struct device_node *np)
-{
-       msm_iommu_init();
-       return 0;
-}
-
-IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu", msm_iommu_of_setup);
+IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu");
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Stepan Moskovchenko <stepanm@codeaurora.org>");
index 50947ebb6d1700e3f81acb5806c1b1a8b183c145..5c36a8b7656ae2b4f298dfa9793807085e9e0708 100644 (file)
@@ -231,19 +231,3 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
 
        return ops;
 }
-
-static int __init of_iommu_init(void)
-{
-       struct device_node *np;
-       const struct of_device_id *match, *matches = &__iommu_of_table;
-
-       for_each_matching_node_and_match(np, matches, &match) {
-               const of_iommu_init_fn init_fn = match->data;
-
-               if (init_fn && init_fn(np))
-                       pr_err("Failed to initialise IOMMU %pOF\n", np);
-       }
-
-       return 0;
-}
-postcore_initcall_sync(of_iommu_init);
index 505548aafeff2276c82c64de4702aae37ddfe15b..50217548c3b8e7afbb906a57b28705d2c1df7cb7 100644 (file)
@@ -274,8 +274,8 @@ void omap_iommu_debugfs_add(struct omap_iommu *obj)
        if (!obj->debug_dir)
                return;
 
-       d = debugfs_create_u8("nr_tlb_entries", 0400, obj->debug_dir,
-                             (u8 *)&obj->nr_tlb_entries);
+       d = debugfs_create_u32("nr_tlb_entries", 0400, obj->debug_dir,
+                              &obj->nr_tlb_entries);
        if (!d)
                return;
 
index e07f02d00c688f0b604c71f6531d83dc43a94ee2..65b9c99707f84837aa0bb863ec211e08050ca005 100644 (file)
@@ -947,7 +947,7 @@ static void __exit qcom_iommu_exit(void)
 module_init(qcom_iommu_init);
 module_exit(qcom_iommu_exit);
 
-IOMMU_OF_DECLARE(qcom_iommu_dev, "qcom,msm-iommu-v1", NULL);
+IOMMU_OF_DECLARE(qcom_iommu_dev, "qcom,msm-iommu-v1");
 
 MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations");
 MODULE_LICENSE("GPL v2");
index 4039e64cd34211db8fac8ebc2f993c5e081e9c83..06f025fd5726f6b230d51c880e7b8accf9e8c738 100644 (file)
@@ -2303,7 +2303,7 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 }
 
 static int its_irq_domain_activate(struct irq_domain *domain,
-                                  struct irq_data *d, bool early)
+                                  struct irq_data *d, bool reserve)
 {
        struct its_device *its_dev = irq_data_get_irq_chip_data(d);
        u32 event = its_get_event_id(d);
@@ -2818,7 +2818,7 @@ static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq
 }
 
 static int its_vpe_irq_domain_activate(struct irq_domain *domain,
-                                      struct irq_data *d, bool early)
+                                      struct irq_data *d, bool reserve)
 {
        struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
        struct its_node *its;
index 06f29cf5018a151f7d35641d90b8d043aec85516..cee59fe1321c44f9e37c9b6e5bcfc1a57fb98f41 100644 (file)
@@ -342,6 +342,9 @@ static irqreturn_t intc_irqpin_shared_irq_handler(int irq, void *dev_id)
  */
 static struct lock_class_key intc_irqpin_irq_lock_class;
 
+/* And this is for the request mutex */
+static struct lock_class_key intc_irqpin_irq_request_class;
+
 static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
                                      irq_hw_number_t hw)
 {
@@ -352,7 +355,8 @@ static int intc_irqpin_irq_domain_map(struct irq_domain *h, unsigned int virq,
 
        intc_irqpin_dbg(&p->irq[hw], "map");
        irq_set_chip_data(virq, h->host_data);
-       irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class);
+       irq_set_lockdep_class(virq, &intc_irqpin_irq_lock_class,
+                             &intc_irqpin_irq_request_class);
        irq_set_chip_and_handler(virq, &p->irq_chip, handle_level_irq);
        return 0;
 }
index fd83c7f77a95d4998cff58be3d78ba467faddeab..ede4fa0ac2cceb736f69890a8823909a5d162f38 100644 (file)
@@ -188,6 +188,7 @@ void led_blink_set(struct led_classdev *led_cdev,
 {
        del_timer_sync(&led_cdev->blink_timer);
 
+       clear_bit(LED_BLINK_SW, &led_cdev->work_flags);
        clear_bit(LED_BLINK_ONESHOT, &led_cdev->work_flags);
        clear_bit(LED_BLINK_ONESHOT_STOP, &led_cdev->work_flags);
 
index 09cf3699e354415046af18c21eae3d98a445a53e..a307832d7e45fd90e046c7118d7fe505036d3cf6 100644 (file)
@@ -184,6 +184,7 @@ static struct irq_chip arizona_irq_chip = {
 };
 
 static struct lock_class_key arizona_irq_lock_class;
+static struct lock_class_key arizona_irq_request_class;
 
 static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
                              irq_hw_number_t hw)
@@ -191,7 +192,8 @@ static int arizona_irq_map(struct irq_domain *h, unsigned int virq,
        struct arizona *data = h->host_data;
 
        irq_set_chip_data(virq, data);
-       irq_set_lockdep_class(virq, &arizona_irq_lock_class);
+       irq_set_lockdep_class(virq, &arizona_irq_lock_class,
+               &arizona_irq_request_class);
        irq_set_chip_and_handler(virq, &arizona_irq_chip, handle_simple_irq);
        irq_set_nested_thread(virq, 1);
        irq_set_noprobe(virq);
index c9714072e22465d4b23d8101038f782b084b2dca..59c82cdcf48d8a508613dbc7b1c98654285de28f 100644 (file)
@@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
        u8 *ptr;
        u8 *rx_buf;
        u8 sum;
+       u8 rx_byte;
        int ret = 0, final_ret;
 
        len = cros_ec_prepare_tx(ec_dev, ec_msg);
@@ -421,25 +422,22 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
        if (!ret) {
                /* Verify that EC can process command */
                for (i = 0; i < len; i++) {
-                       switch (rx_buf[i]) {
-                       case EC_SPI_PAST_END:
-                       case EC_SPI_RX_BAD_DATA:
-                       case EC_SPI_NOT_READY:
-                               ret = -EAGAIN;
-                               ec_msg->result = EC_RES_IN_PROGRESS;
-                       default:
+                       rx_byte = rx_buf[i];
+                       if (rx_byte == EC_SPI_PAST_END  ||
+                           rx_byte == EC_SPI_RX_BAD_DATA ||
+                           rx_byte == EC_SPI_NOT_READY) {
+                               ret = -EREMOTEIO;
                                break;
                        }
-                       if (ret)
-                               break;
                }
-               if (!ret)
-                       ret = cros_ec_spi_receive_packet(ec_dev,
-                                       ec_msg->insize + sizeof(*response));
-       } else {
-               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
        }
 
+       if (!ret)
+               ret = cros_ec_spi_receive_packet(ec_dev,
+                               ec_msg->insize + sizeof(*response));
+       else
+               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
        final_ret = terminate_request(ec_dev);
 
        spi_bus_unlock(ec_spi->spi->master);
@@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
        int i, len;
        u8 *ptr;
        u8 *rx_buf;
+       u8 rx_byte;
        int sum;
        int ret = 0, final_ret;
 
@@ -544,25 +543,22 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
        if (!ret) {
                /* Verify that EC can process command */
                for (i = 0; i < len; i++) {
-                       switch (rx_buf[i]) {
-                       case EC_SPI_PAST_END:
-                       case EC_SPI_RX_BAD_DATA:
-                       case EC_SPI_NOT_READY:
-                               ret = -EAGAIN;
-                               ec_msg->result = EC_RES_IN_PROGRESS;
-                       default:
+                       rx_byte = rx_buf[i];
+                       if (rx_byte == EC_SPI_PAST_END  ||
+                           rx_byte == EC_SPI_RX_BAD_DATA ||
+                           rx_byte == EC_SPI_NOT_READY) {
+                               ret = -EREMOTEIO;
                                break;
                        }
-                       if (ret)
-                               break;
                }
-               if (!ret)
-                       ret = cros_ec_spi_receive_response(ec_dev,
-                                       ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
-       } else {
-               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
        }
 
+       if (!ret)
+               ret = cros_ec_spi_receive_response(ec_dev,
+                               ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
+       else
+               dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
+
        final_ret = terminate_request(ec_dev);
 
        spi_bus_unlock(ec_spi->spi->master);
@@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
                           sizeof(struct ec_response_get_protocol_info);
        ec_dev->dout_size = sizeof(struct ec_host_request);
 
+       ec_spi->last_transfer_ns = ktime_get_ns();
 
        err = cros_ec_register(ec_dev);
        if (err) {
index 590fb9aad77d9a690d6948d61715f04af0a43cd9..c3ed885c155cf3c85676133644a11666dfa3db01 100644 (file)
@@ -1543,6 +1543,9 @@ static void rtsx_pci_shutdown(struct pci_dev *pcidev)
        rtsx_pci_power_off(pcr, HOST_ENTER_S1);
 
        pci_disable_device(pcidev);
+       free_irq(pcr->irq, (void *)pcr);
+       if (pcr->msi_en)
+               pci_disable_msi(pcr->pci);
 }
 
 #else /* CONFIG_PM */
index da16bf45fab43ee9a946beef340f4cd2a224156e..dc94ffc6321a84dd25ce08d0f1a9374d40d4cead 100644 (file)
@@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
 EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
 
 static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
-                             struct device_node *node)
+                             struct device_node *parent)
 {
+       struct device_node *node;
+
        if (pdata && pdata->codec)
                return true;
 
-       if (of_find_node_by_name(node, "codec"))
+       node = of_get_child_by_name(parent, "codec");
+       if (node) {
+               of_node_put(node);
                return true;
+       }
 
        return false;
 }
index d66502d36ba0b3202d1c15c08540fa8aade42a32..dd19f17a1b637543965dd94e64d0d44b9178f64c 100644 (file)
@@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
 };
 
 
-static bool twl6040_has_vibra(struct device_node *node)
+static bool twl6040_has_vibra(struct device_node *parent)
 {
-#ifdef CONFIG_OF
-       if (of_find_node_by_name(node, "vibra"))
+       struct device_node *node;
+
+       node = of_get_child_by_name(parent, "vibra");
+       if (node) {
+               of_node_put(node);
                return true;
-#endif
+       }
+
        return false;
 }
 
index fcf7235d5742ae443bdc629d293983db5b60312d..157e1d9e7725a050f64c32ebed135b46ae0ee58d 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/clk.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
@@ -667,3 +668,5 @@ int renesas_sdhi_remove(struct platform_device *pdev)
        return 0;
 }
 EXPORT_SYMBOL_GPL(renesas_sdhi_remove);
+
+MODULE_LICENSE("GPL v2");
index f7f157a62a4a7dfa7155b5fe9933ead9f870ad1b..555c7f133eb8a5246ff3534d1899927463e99dc2 100644 (file)
@@ -1424,7 +1424,9 @@ static const struct file_operations s3cmci_fops_state = {
 struct s3cmci_reg {
        unsigned short  addr;
        unsigned char   *name;
-} debug_regs[] = {
+};
+
+static const struct s3cmci_reg debug_regs[] = {
        DBG_REG(CON),
        DBG_REG(PRE),
        DBG_REG(CMDARG),
@@ -1446,7 +1448,7 @@ struct s3cmci_reg {
 static int s3cmci_regs_show(struct seq_file *seq, void *v)
 {
        struct s3cmci_host *host = seq->private;
-       struct s3cmci_reg *rptr = debug_regs;
+       const struct s3cmci_reg *rptr = debug_regs;
 
        for (; rptr->name; rptr++)
                seq_printf(seq, "SDI%s\t=0x%08x\n", rptr->name,
index f80e911b8843819db8dcd1956c76ce2bf60b5ab8..73b6055774474e322b07cda4144c48b5b235a55c 100644 (file)
@@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
        if (!ops->oobbuf)
                ops->ooblen = 0;
 
-       if (offs < 0 || offs + ops->len >= mtd->size)
+       if (offs < 0 || offs + ops->len > mtd->size)
                return -EINVAL;
 
        if (ops->ooblen) {
index e0eb51d8c0129937b35157ccdc107e5ef54c038a..dd56a671ea4285af0f5079bc652ecf4a32410272 100644 (file)
@@ -1763,7 +1763,7 @@ try_dmaread:
                        err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
                                                              addr);
                        /* erased page bitflips corrected */
-                       if (err > 0)
+                       if (err >= 0)
                                return err;
                }
 
index 484f7fbc3f7d2d11cd66fc3416e64ab38d47f852..a8bde6665c24f7e20e6103959ceee16c5d3ec5c8 100644 (file)
@@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
                goto out_ce;
        }
 
-       gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
-       if (IS_ERR(gpiomtd->nwp)) {
-               ret = PTR_ERR(gpiomtd->nwp);
+       gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
+       if (IS_ERR(gpiomtd->ale)) {
+               ret = PTR_ERR(gpiomtd->ale);
                goto out_ce;
        }
 
index 50f8d4a1b9832326070045d0c294d22393001fbd..d4d824ef64e9fb395af3bc549daae72b96731e16 100644 (file)
@@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
                return ret;
        }
 
-       /* handle the block mark swapping */
-       block_mark_swapping(this, payload_virt, auxiliary_virt);
-
        /* Loop over status bytes, accumulating ECC status. */
        status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
 
@@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
                max_bitflips = max_t(unsigned int, max_bitflips, *status);
        }
 
+       /* handle the block mark swapping */
+       block_mark_swapping(this, buf, auxiliary_virt);
+
        if (oob_required) {
                /*
                 * It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
index 90b9a9ccbe60e3fad44855705bf2dc4623cd5347..9285f60e57836ae056349751b75b9437596bd131 100644 (file)
@@ -963,6 +963,7 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command)
 
        switch (command) {
        case NAND_CMD_READ0:
+       case NAND_CMD_READOOB:
        case NAND_CMD_PAGEPROG:
                info->use_ecc = 1;
                break;
index 2260063b0ea83be7f24e5d780af8bd808c23b427..6e5cf9d9cd9927f4264e3a01394e2b84d80d7d3b 100644 (file)
@@ -413,6 +413,7 @@ static int of_dev_node_match(struct device *dev, const void *data)
        return dev->of_node == data;
 }
 
+/* Note this function returns a reference to the mux_chip dev. */
 static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
 {
        struct device *dev;
@@ -466,6 +467,7 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
            (!args.args_count && (mux_chip->controllers > 1))) {
                dev_err(dev, "%pOF: wrong #mux-control-cells for %pOF\n",
                        np, args.np);
+               put_device(&mux_chip->dev);
                return ERR_PTR(-EINVAL);
        }
 
@@ -476,10 +478,10 @@ struct mux_control *mux_control_get(struct device *dev, const char *mux_name)
        if (controller >= mux_chip->controllers) {
                dev_err(dev, "%pOF: bad mux controller %u specified in %pOF\n",
                        np, controller, args.np);
+               put_device(&mux_chip->dev);
                return ERR_PTR(-EINVAL);
        }
 
-       get_device(&mux_chip->dev);
        return &mux_chip->mux[controller];
 }
 EXPORT_SYMBOL_GPL(mux_control_get);
index 0626dcfd1f3d83ceaad91968cffd65370189cfac..760d2c07e3a2c1edd15a2f70d20d1272adb3fd8a 100644 (file)
@@ -526,7 +526,7 @@ static int flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
                data = be32_to_cpup((__be32 *)&cf->data[0]);
                flexcan_write(data, &priv->tx_mb->data[0]);
        }
-       if (cf->can_dlc > 3) {
+       if (cf->can_dlc > 4) {
                data = be32_to_cpup((__be32 *)&cf->data[4]);
                flexcan_write(data, &priv->tx_mb->data[1]);
        }
index b00358297424604634489a9b106bf313c06b7fc9..12ff0020ecd60904b65b89a633cd2a60bf880ed2 100644 (file)
@@ -395,6 +395,7 @@ static void ems_usb_rx_err(struct ems_usb *dev, struct ems_cpc_msg *msg)
 
                if (dev->can.state == CAN_STATE_ERROR_WARNING ||
                    dev->can.state == CAN_STATE_ERROR_PASSIVE) {
+                       cf->can_id |= CAN_ERR_CRTL;
                        cf->data[1] = (txerr > rxerr) ?
                            CAN_ERR_CRTL_TX_PASSIVE : CAN_ERR_CRTL_RX_PASSIVE;
                }
index 68ac3e88a8cecbe5b4a58da8491756ad5c26039a..8bf80ad9dc44cf4622811bc1fab31404c50cfb95 100644 (file)
@@ -449,7 +449,7 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
                dev_err(netdev->dev.parent, "Couldn't set bittimings (err=%d)",
                        rc);
 
-       return rc;
+       return (rc > 0) ? 0 : rc;
 }
 
 static void gs_usb_xmit_callback(struct urb *urb)
index 8404e8852a0f96df80d7cb3d1059e64890405949..b4c4a2c764378e17131234f6b9481d76be2db786 100644 (file)
@@ -194,7 +194,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
                tbp = peer_tb;
        }
 
-       if (tbp[IFLA_IFNAME]) {
+       if (ifmp && tbp[IFLA_IFNAME]) {
                nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
                name_assign_type = NET_NAME_USER;
        } else {
index f5a8dd96fd75f273f6a31647087bd0c5e0b4fd47..4498ab897d94b9ea86f5fb83fd5eda98bd88e263 100644 (file)
@@ -1500,10 +1500,13 @@ static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds,
 {
        struct b53_device *dev = ds->priv;
 
-       /* Older models support a different tag format that we do not
-        * support in net/dsa/tag_brcm.c yet.
+       /* Older models (5325, 5365) support a different tag format that we do
+        * not support in net/dsa/tag_brcm.c yet. 539x and 531x5 require managed
+        * mode to be turned on which means we need to specifically manage ARL
+        * misses on multicast addresses (TBD).
         */
-       if (is5325(dev) || is5365(dev) || !b53_can_enable_brcm_tags(ds, port))
+       if (is5325(dev) || is5365(dev) || is539x(dev) || is531x5(dev) ||
+           !b53_can_enable_brcm_tags(ds, port))
                return DSA_TAG_PROTO_NONE;
 
        /* Broadcom BCM58xx chips have a flow accelerator on Port 8
index f4e13a7014bde0a1cd4f60b4b60a5a1b0459d09e..36c8950dbd2d80699f396f217f0f438479f68355 100644 (file)
@@ -602,7 +602,7 @@ struct vortex_private {
        struct sk_buff* rx_skbuff[RX_RING_SIZE];
        struct sk_buff* tx_skbuff[TX_RING_SIZE];
        unsigned int cur_rx, cur_tx;            /* The next free ring entry */
-       unsigned int dirty_rx, dirty_tx;        /* The ring entries to be free()ed. */
+       unsigned int dirty_tx;  /* The ring entries to be free()ed. */
        struct vortex_extra_stats xstats;       /* NIC-specific extra stats */
        struct sk_buff *tx_skb;                         /* Packet being eaten by bus master ctrl.  */
        dma_addr_t tx_skb_dma;                          /* Allocated DMA address for bus master ctrl DMA.   */
@@ -618,7 +618,6 @@ struct vortex_private {
 
        /* The remainder are related to chip state, mostly media selection. */
        struct timer_list timer;                        /* Media selection timer. */
-       struct timer_list rx_oom_timer;         /* Rx skb allocation retry timer */
        int options;                                            /* User-settable misc. driver options. */
        unsigned int media_override:4,          /* Passed-in media type. */
                default_media:4,                                /* Read from the EEPROM/Wn3_Config. */
@@ -760,7 +759,6 @@ static void mdio_sync(struct vortex_private *vp, int bits);
 static int mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *vp, int phy_id, int location, int value);
 static void vortex_timer(struct timer_list *t);
-static void rx_oom_timer(struct timer_list *t);
 static netdev_tx_t vortex_start_xmit(struct sk_buff *skb,
                                     struct net_device *dev);
 static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb,
@@ -1601,7 +1599,6 @@ vortex_up(struct net_device *dev)
 
        timer_setup(&vp->timer, vortex_timer, 0);
        mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait));
-       timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0);
 
        if (vortex_debug > 1)
                pr_debug("%s: Initial media type %s.\n",
@@ -1676,7 +1673,7 @@ vortex_up(struct net_device *dev)
        window_write16(vp, 0x0040, 4, Wn4_NetDiag);
 
        if (vp->full_bus_master_rx) { /* Boomerang bus master. */
-               vp->cur_rx = vp->dirty_rx = 0;
+               vp->cur_rx = 0;
                /* Initialize the RxEarly register as recommended. */
                iowrite16(SetRxThreshold + (1536>>2), ioaddr + EL3_CMD);
                iowrite32(0x0020, ioaddr + PktStatus);
@@ -1729,6 +1726,7 @@ vortex_open(struct net_device *dev)
        struct vortex_private *vp = netdev_priv(dev);
        int i;
        int retval;
+       dma_addr_t dma;
 
        /* Use the now-standard shared IRQ implementation. */
        if ((retval = request_irq(dev->irq, vp->full_bus_master_rx ?
@@ -1753,7 +1751,11 @@ vortex_open(struct net_device *dev)
                                break;                  /* Bad news!  */
 
                        skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */
-                       vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
+                       dma = pci_map_single(VORTEX_PCI(vp), skb->data,
+                                            PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                       if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma))
+                               break;
+                       vp->rx_ring[i].addr = cpu_to_le32(dma);
                }
                if (i != RX_RING_SIZE) {
                        pr_emerg("%s: no memory for rx ring\n", dev->name);
@@ -2067,6 +2069,12 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
                int len = (skb->len + 3) & ~3;
                vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len,
                                                PCI_DMA_TODEVICE);
+               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) {
+                       dev_kfree_skb_any(skb);
+                       dev->stats.tx_dropped++;
+                       return NETDEV_TX_OK;
+               }
+
                spin_lock_irq(&vp->window_lock);
                window_set(vp, 7);
                iowrite32(vp->tx_skb_dma, ioaddr + Wn7_MasterAddr);
@@ -2593,7 +2601,7 @@ boomerang_rx(struct net_device *dev)
        int entry = vp->cur_rx % RX_RING_SIZE;
        void __iomem *ioaddr = vp->ioaddr;
        int rx_status;
-       int rx_work_limit = vp->dirty_rx + RX_RING_SIZE - vp->cur_rx;
+       int rx_work_limit = RX_RING_SIZE;
 
        if (vortex_debug > 5)
                pr_debug("boomerang_rx(): status %4.4x\n", ioread16(ioaddr+EL3_STATUS));
@@ -2614,7 +2622,8 @@ boomerang_rx(struct net_device *dev)
                } else {
                        /* The packet length: up to 4.5K!. */
                        int pkt_len = rx_status & 0x1fff;
-                       struct sk_buff *skb;
+                       struct sk_buff *skb, *newskb;
+                       dma_addr_t newdma;
                        dma_addr_t dma = le32_to_cpu(vp->rx_ring[entry].addr);
 
                        if (vortex_debug > 4)
@@ -2633,9 +2642,27 @@ boomerang_rx(struct net_device *dev)
                                pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
                                vp->rx_copy++;
                        } else {
+                               /* Pre-allocate the replacement skb.  If it or its
+                                * mapping fails then recycle the buffer thats already
+                                * in place
+                                */
+                               newskb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ);
+                               if (!newskb) {
+                                       dev->stats.rx_dropped++;
+                                       goto clear_complete;
+                               }
+                               newdma = pci_map_single(VORTEX_PCI(vp), newskb->data,
+                                                       PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
+                               if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) {
+                                       dev->stats.rx_dropped++;
+                                       consume_skb(newskb);
+                                       goto clear_complete;
+                               }
+
                                /* Pass up the skbuff already on the Rx ring. */
                                skb = vp->rx_skbuff[entry];
-                               vp->rx_skbuff[entry] = NULL;
+                               vp->rx_skbuff[entry] = newskb;
+                               vp->rx_ring[entry].addr = cpu_to_le32(newdma);
                                skb_put(skb, pkt_len);
                                pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE);
                                vp->rx_nocopy++;
@@ -2653,55 +2680,15 @@ boomerang_rx(struct net_device *dev)
                        netif_rx(skb);
                        dev->stats.rx_packets++;
                }
-               entry = (++vp->cur_rx) % RX_RING_SIZE;
-       }
-       /* Refill the Rx ring buffers. */
-       for (; vp->cur_rx - vp->dirty_rx > 0; vp->dirty_rx++) {
-               struct sk_buff *skb;
-               entry = vp->dirty_rx % RX_RING_SIZE;
-               if (vp->rx_skbuff[entry] == NULL) {
-                       skb = netdev_alloc_skb_ip_align(dev, PKT_BUF_SZ);
-                       if (skb == NULL) {
-                               static unsigned long last_jif;
-                               if (time_after(jiffies, last_jif + 10 * HZ)) {
-                                       pr_warn("%s: memory shortage\n",
-                                               dev->name);
-                                       last_jif = jiffies;
-                               }
-                               if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)
-                                       mod_timer(&vp->rx_oom_timer, RUN_AT(HZ * 1));
-                               break;                  /* Bad news!  */
-                       }
 
-                       vp->rx_ring[entry].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE));
-                       vp->rx_skbuff[entry] = skb;
-               }
+clear_complete:
                vp->rx_ring[entry].status = 0;  /* Clear complete bit. */
                iowrite16(UpUnstall, ioaddr + EL3_CMD);
+               entry = (++vp->cur_rx) % RX_RING_SIZE;
        }
        return 0;
 }
 
-/*
- * If we've hit a total OOM refilling the Rx ring we poll once a second
- * for some memory.  Otherwise there is no way to restart the rx process.
- */
-static void
-rx_oom_timer(struct timer_list *t)
-{
-       struct vortex_private *vp = from_timer(vp, t, rx_oom_timer);
-       struct net_device *dev = vp->mii.dev;
-
-       spin_lock_irq(&vp->lock);
-       if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE)        /* This test is redundant, but makes me feel good */
-               boomerang_rx(dev);
-       if (vortex_debug > 1) {
-               pr_debug("%s: rx_oom_timer %s\n", dev->name,
-                       ((vp->cur_rx - vp->dirty_rx) != RX_RING_SIZE) ? "succeeded" : "retrying");
-       }
-       spin_unlock_irq(&vp->lock);
-}
-
 static void
 vortex_down(struct net_device *dev, int final_down)
 {
@@ -2711,7 +2698,6 @@ vortex_down(struct net_device *dev, int final_down)
        netdev_reset_queue(dev);
        netif_stop_queue(dev);
 
-       del_timer_sync(&vp->rx_oom_timer);
        del_timer_sync(&vp->timer);
 
        /* Turn off statistics ASAP.  We update dev->stats below. */
index 97c5a89a9cf7a4f1a65dbef70d889efd7d661c20..fbe21a817bd8a2c9ca01a2be6e8c6948b704ef87 100644 (file)
@@ -75,6 +75,9 @@ static struct workqueue_struct *ena_wq;
 MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
 
 static int ena_rss_init_default(struct ena_adapter *adapter);
+static void check_for_admin_com_state(struct ena_adapter *adapter);
+static void ena_destroy_device(struct ena_adapter *adapter);
+static int ena_restore_device(struct ena_adapter *adapter);
 
 static void ena_tx_timeout(struct net_device *dev)
 {
@@ -1565,7 +1568,7 @@ static int ena_rss_configure(struct ena_adapter *adapter)
 
 static int ena_up_complete(struct ena_adapter *adapter)
 {
-       int rc, i;
+       int rc;
 
        rc = ena_rss_configure(adapter);
        if (rc)
@@ -1584,17 +1587,6 @@ static int ena_up_complete(struct ena_adapter *adapter)
 
        ena_napi_enable_all(adapter);
 
-       /* Enable completion queues interrupt */
-       for (i = 0; i < adapter->num_queues; i++)
-               ena_unmask_interrupt(&adapter->tx_ring[i],
-                                    &adapter->rx_ring[i]);
-
-       /* schedule napi in case we had pending packets
-        * from the last time we disable napi
-        */
-       for (i = 0; i < adapter->num_queues; i++)
-               napi_schedule(&adapter->ena_napi[i].napi);
-
        return 0;
 }
 
@@ -1731,7 +1723,7 @@ create_err:
 
 static int ena_up(struct ena_adapter *adapter)
 {
-       int rc;
+       int rc, i;
 
        netdev_dbg(adapter->netdev, "%s\n", __func__);
 
@@ -1774,6 +1766,17 @@ static int ena_up(struct ena_adapter *adapter)
 
        set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
 
+       /* Enable completion queues interrupt */
+       for (i = 0; i < adapter->num_queues; i++)
+               ena_unmask_interrupt(&adapter->tx_ring[i],
+                                    &adapter->rx_ring[i]);
+
+       /* schedule napi in case we had pending packets
+        * from the last time we disable napi
+        */
+       for (i = 0; i < adapter->num_queues; i++)
+               napi_schedule(&adapter->ena_napi[i].napi);
+
        return rc;
 
 err_up:
@@ -1884,6 +1887,17 @@ static int ena_close(struct net_device *netdev)
        if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
                ena_down(adapter);
 
+       /* Check for device status and issue reset if needed*/
+       check_for_admin_com_state(adapter);
+       if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+               netif_err(adapter, ifdown, adapter->netdev,
+                         "Destroy failure, restarting device\n");
+               ena_dump_stats_to_dmesg(adapter);
+               /* rtnl lock already obtained in dev_ioctl() layer */
+               ena_destroy_device(adapter);
+               ena_restore_device(adapter);
+       }
+
        return 0;
 }
 
@@ -2544,11 +2558,12 @@ static void ena_destroy_device(struct ena_adapter *adapter)
 
        ena_com_set_admin_running_state(ena_dev, false);
 
-       ena_close(netdev);
+       if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+               ena_down(adapter);
 
        /* Before releasing the ENA resources, a device reset is required.
         * (to prevent the device from accessing them).
-        * In case the reset flag is set and the device is up, ena_close
+        * In case the reset flag is set and the device is up, ena_down()
         * already perform the reset, so it can be skipped.
         */
        if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
index 3c63b16d485f4bb3a7587e9e6d36dd0e121668d2..d9efbc8d783b84b128379e0ce58a43b005a8ab58 100644 (file)
@@ -159,6 +159,8 @@ struct arc_emac_priv {
        unsigned int link;
        unsigned int duplex;
        unsigned int speed;
+
+       unsigned int rx_missed_errors;
 };
 
 /**
index 3241af1ce7182824c09ee3ad774f122565f6c940..bd277b0dc615118a58b81dfba5b040e26fa667ba 100644 (file)
@@ -26,6 +26,8 @@
 
 #include "emac.h"
 
+static void arc_emac_restart(struct net_device *ndev);
+
 /**
  * arc_emac_tx_avail - Return the number of available slots in the tx ring.
  * @priv: Pointer to ARC EMAC private data structure.
@@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
                        continue;
                }
 
-               pktlen = info & LEN_MASK;
-               stats->rx_packets++;
-               stats->rx_bytes += pktlen;
-               skb = rx_buff->skb;
-               skb_put(skb, pktlen);
-               skb->dev = ndev;
-               skb->protocol = eth_type_trans(skb, ndev);
-
-               dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
-                                dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
-
-               /* Prepare the BD for next cycle */
-               rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
-                                                        EMAC_BUFFER_SIZE);
-               if (unlikely(!rx_buff->skb)) {
+               /* Prepare the BD for next cycle. netif_receive_skb()
+                * only if new skb was allocated and mapped to avoid holes
+                * in the RX fifo.
+                */
+               skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
+               if (unlikely(!skb)) {
+                       if (net_ratelimit())
+                               netdev_err(ndev, "cannot allocate skb\n");
+                       /* Return ownership to EMAC */
+                       rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
                        stats->rx_errors++;
-                       /* Because receive_skb is below, increment rx_dropped */
                        stats->rx_dropped++;
                        continue;
                }
 
-               /* receive_skb only if new skb was allocated to avoid holes */
-               netif_receive_skb(skb);
-
-               addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
+               addr = dma_map_single(&ndev->dev, (void *)skb->data,
                                      EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
                if (dma_mapping_error(&ndev->dev, addr)) {
                        if (net_ratelimit())
-                               netdev_err(ndev, "cannot dma map\n");
-                       dev_kfree_skb(rx_buff->skb);
+                               netdev_err(ndev, "cannot map dma buffer\n");
+                       dev_kfree_skb(skb);
+                       /* Return ownership to EMAC */
+                       rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
                        stats->rx_errors++;
+                       stats->rx_dropped++;
                        continue;
                }
+
+               /* unmap previosly mapped skb */
+               dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
+                                dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
+
+               pktlen = info & LEN_MASK;
+               stats->rx_packets++;
+               stats->rx_bytes += pktlen;
+               skb_put(rx_buff->skb, pktlen);
+               rx_buff->skb->dev = ndev;
+               rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
+
+               netif_receive_skb(rx_buff->skb);
+
+               rx_buff->skb = skb;
                dma_unmap_addr_set(rx_buff, addr, addr);
                dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
 
@@ -258,6 +269,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
        return work_done;
 }
 
+/**
+ * arc_emac_rx_miss_handle - handle R_MISS register
+ * @ndev:      Pointer to the net_device structure.
+ */
+static void arc_emac_rx_miss_handle(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct net_device_stats *stats = &ndev->stats;
+       unsigned int miss;
+
+       miss = arc_reg_get(priv, R_MISS);
+       if (miss) {
+               stats->rx_errors += miss;
+               stats->rx_missed_errors += miss;
+               priv->rx_missed_errors += miss;
+       }
+}
+
+/**
+ * arc_emac_rx_stall_check - check RX stall
+ * @ndev:      Pointer to the net_device structure.
+ * @budget:    How many BDs requested to process on 1 call.
+ * @work_done: How many BDs processed
+ *
+ * Under certain conditions EMAC stop reception of incoming packets and
+ * continuously increment R_MISS register instead of saving data into
+ * provided buffer. This function detect that condition and restart
+ * EMAC.
+ */
+static void arc_emac_rx_stall_check(struct net_device *ndev,
+                                   int budget, unsigned int work_done)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct arc_emac_bd *rxbd;
+
+       if (work_done)
+               priv->rx_missed_errors = 0;
+
+       if (priv->rx_missed_errors && budget) {
+               rxbd = &priv->rxbd[priv->last_rx_bd];
+               if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
+                       arc_emac_restart(ndev);
+                       priv->rx_missed_errors = 0;
+               }
+       }
+}
+
 /**
  * arc_emac_poll - NAPI poll handler.
  * @napi:      Pointer to napi_struct structure.
@@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
        unsigned int work_done;
 
        arc_emac_tx_clean(ndev);
+       arc_emac_rx_miss_handle(ndev);
 
        work_done = arc_emac_rx(ndev, budget);
        if (work_done < budget) {
@@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
                arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
        }
 
+       arc_emac_rx_stall_check(ndev, budget, work_done);
+
        return work_done;
 }
 
@@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
                if (status & MSER_MASK) {
                        stats->rx_missed_errors += 0x100;
                        stats->rx_errors += 0x100;
+                       priv->rx_missed_errors += 0x100;
+                       napi_schedule(&priv->napi);
                }
 
                if (status & RXCR_MASK) {
@@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 }
 
 
+/**
+ * arc_emac_restart - Restart EMAC
+ * @ndev:      Pointer to net_device structure.
+ *
+ * This function do hardware reset of EMAC in order to restore
+ * network packets reception.
+ */
+static void arc_emac_restart(struct net_device *ndev)
+{
+       struct arc_emac_priv *priv = netdev_priv(ndev);
+       struct net_device_stats *stats = &ndev->stats;
+       int i;
+
+       if (net_ratelimit())
+               netdev_warn(ndev, "restarting stalled EMAC\n");
+
+       netif_stop_queue(ndev);
+
+       /* Disable interrupts */
+       arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+       /* Disable EMAC */
+       arc_reg_clr(priv, R_CTRL, EN_MASK);
+
+       /* Return the sk_buff to system */
+       arc_free_tx_queue(ndev);
+
+       /* Clean Tx BD's */
+       priv->txbd_curr = 0;
+       priv->txbd_dirty = 0;
+       memset(priv->txbd, 0, TX_RING_SZ);
+
+       for (i = 0; i < RX_BD_NUM; i++) {
+               struct arc_emac_bd *rxbd = &priv->rxbd[i];
+               unsigned int info = le32_to_cpu(rxbd->info);
+
+               if (!(info & FOR_EMAC)) {
+                       stats->rx_errors++;
+                       stats->rx_dropped++;
+               }
+               /* Return ownership to EMAC */
+               rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
+       }
+       priv->last_rx_bd = 0;
+
+       /* Make sure info is visible to EMAC before enable */
+       wmb();
+
+       /* Enable interrupts */
+       arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+       /* Enable EMAC */
+       arc_reg_or(priv, R_CTRL, EN_MASK);
+
+       netif_start_queue(ndev);
+}
+
 static const struct net_device_ops arc_emac_netdev_ops = {
        .ndo_open               = arc_emac_open,
        .ndo_stop               = arc_emac_stop,
index 4c739d5355d2279a5f1c4cd23e8ccf3288788e8d..8ae269ec17a119b419afc92eeb66dde76d43d473 100644 (file)
@@ -3030,7 +3030,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
 
        del_timer_sync(&bp->timer);
 
-       if (IS_PF(bp)) {
+       if (IS_PF(bp) && !BP_NOMCP(bp)) {
                /* Set ALWAYS_ALIVE bit in shmem */
                bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
                bnx2x_drv_pulse(bp);
@@ -3116,7 +3116,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
        bp->cnic_loaded = false;
 
        /* Clear driver version indication in shmem */
-       if (IS_PF(bp))
+       if (IS_PF(bp) && !BP_NOMCP(bp))
                bnx2x_update_mng_version(bp);
 
        /* Check if there are pending parity attentions. If there are - set
index 91e2a7560b48d572d26e8566c9a3b0667083d45d..ddd5d3ebd20111f667536aac61141b10e5bb2e7c 100644 (file)
@@ -9578,6 +9578,15 @@ static int bnx2x_init_shmem(struct bnx2x *bp)
 
        do {
                bp->common.shmem_base = REG_RD(bp, MISC_REG_SHARED_MEM_ADDR);
+
+               /* If we read all 0xFFs, means we are in PCI error state and
+                * should bail out to avoid crashes on adapter's FW reads.
+                */
+               if (bp->common.shmem_base == 0xFFFFFFFF) {
+                       bp->flags |= NO_MCP_FLAG;
+                       return -ENODEV;
+               }
+
                if (bp->common.shmem_base) {
                        val = SHMEM_RD(bp, validity_map[BP_PORT(bp)]);
                        if (val & SHR_MEM_VALIDITY_MB)
@@ -14320,7 +14329,10 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
                BNX2X_ERR("IO slot reset --> driver unload\n");
 
                /* MCP should have been reset; Need to wait for validity */
-               bnx2x_init_shmem(bp);
+               if (bnx2x_init_shmem(bp)) {
+                       rtnl_unlock();
+                       return PCI_ERS_RESULT_DISCONNECT;
+               }
 
                if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) {
                        u32 v;
index 5ee18660bc33a2572320ac3210029e883841b044..c9617675f934b615b05f13e4e74c8d9f7aeeaabb 100644 (file)
@@ -70,7 +70,7 @@ static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
                netdev_err(bp->dev, "vf ndo called though sriov is disabled\n");
                return -EINVAL;
        }
-       if (vf_id >= bp->pf.max_vfs) {
+       if (vf_id >= bp->pf.active_vfs) {
                netdev_err(bp->dev, "Invalid VF id %d\n", vf_id);
                return -EINVAL;
        }
index 3d201d7324bdc7b2c50377e5da5b3ab3acb8a423..d8fee26cd45eaf9691a323c5f5d6bfd8379eb3b8 100644 (file)
@@ -421,7 +421,7 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
        }
 
        /* If all IP and L4 fields are wildcarded then this is an L2 flow */
-       if (is_wildcard(&l3_mask, sizeof(l3_mask)) &&
+       if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
            is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
                flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
        } else {
index de51c2177d03b3cf9e4653226bd8901b1d29834e..8995cfefbfcf1aa1a46f1cbd2f1bdb6722dca7bb 100644 (file)
@@ -4,11 +4,13 @@
  * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
  * Copyright (C) 2001, 2002, 2003 Jeff Garzik (jgarzik@pobox.com)
  * Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2005-2014 Broadcom Corporation.
+ * Copyright (C) 2005-2016 Broadcom Corporation.
+ * Copyright (C) 2016-2017 Broadcom Limited.
  *
  * Firmware is:
  *     Derived from proprietary unpublished source code,
- *     Copyright (C) 2000-2003 Broadcom Corporation.
+ *     Copyright (C) 2000-2016 Broadcom Corporation.
+ *     Copyright (C) 2016-2017 Broadcom Ltd.
  *
  *     Permission is hereby granted for the distribution of this firmware
  *     data in hexadecimal or equivalent format, provided this copyright
@@ -10052,6 +10054,16 @@ static int tg3_reset_hw(struct tg3 *tp, bool reset_phy)
 
        tw32(GRC_MODE, tp->grc_mode | val);
 
+       /* On one of the AMD platform, MRRS is restricted to 4000 because of
+        * south bridge limitation. As a workaround, Driver is setting MRRS
+        * to 2048 instead of default 4096.
+        */
+       if (tp->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
+           tp->pdev->subsystem_device == TG3PCI_SUBDEVICE_ID_DELL_5762) {
+               val = tr32(TG3PCI_DEV_STATUS_CTRL) & ~MAX_READ_REQ_MASK;
+               tw32(TG3PCI_DEV_STATUS_CTRL, val | MAX_READ_REQ_SIZE_2048);
+       }
+
        /* Setup the timer prescalar register.  Clock is always 66Mhz. */
        val = tr32(GRC_MISC_CFG);
        val &= ~0xff;
@@ -14225,7 +14237,10 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
        /* Reset PHY, otherwise the read DMA engine will be in a mode that
         * breaks all requests to 256 bytes.
         */
-       if (tg3_asic_rev(tp) == ASIC_REV_57766)
+       if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
+           tg3_asic_rev(tp) == ASIC_REV_5717 ||
+           tg3_asic_rev(tp) == ASIC_REV_5719 ||
+           tg3_asic_rev(tp) == ASIC_REV_5720)
                reset_phy = true;
 
        err = tg3_restart_hw(tp, reset_phy);
index c2d02d02d1e6f4558365fe53f152e13e045e0277..1f0271fa7c74026edbc68c7880a44287eded7066 100644 (file)
@@ -5,7 +5,8 @@
  * Copyright (C) 2001, 2002, 2003, 2004 David S. Miller (davem@redhat.com)
  * Copyright (C) 2001 Jeff Garzik (jgarzik@pobox.com)
  * Copyright (C) 2004 Sun Microsystems Inc.
- * Copyright (C) 2007-2014 Broadcom Corporation.
+ * Copyright (C) 2007-2016 Broadcom Corporation.
+ * Copyright (C) 2016-2017 Broadcom Limited.
  */
 
 #ifndef _T3_H
@@ -96,6 +97,7 @@
 #define TG3PCI_SUBDEVICE_ID_DELL_JAGUAR                0x0106
 #define TG3PCI_SUBDEVICE_ID_DELL_MERLOT                0x0109
 #define TG3PCI_SUBDEVICE_ID_DELL_SLIM_MERLOT   0x010a
+#define TG3PCI_SUBDEVICE_ID_DELL_5762          0x07f0
 #define TG3PCI_SUBVENDOR_ID_COMPAQ             PCI_VENDOR_ID_COMPAQ
 #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE     0x007c
 #define TG3PCI_SUBDEVICE_ID_COMPAQ_BANSHEE_2   0x009a
 #define TG3PCI_STD_RING_PROD_IDX       0x00000098 /* 64-bit */
 #define TG3PCI_RCV_RET_RING_CON_IDX    0x000000a0 /* 64-bit */
 /* 0xa8 --> 0xb8 unused */
+#define TG3PCI_DEV_STATUS_CTRL         0x000000b4
+#define  MAX_READ_REQ_SIZE_2048                 0x00004000
+#define  MAX_READ_REQ_MASK              0x00007000
 #define TG3PCI_DUAL_MAC_CTRL           0x000000b8
 #define  DUAL_MAC_CTRL_CH_MASK          0x00000003
 #define  DUAL_MAC_CTRL_ID               0x00000004
index 6f9fa6e3c42a0cc1988e82ae976cce17d45ca0e5..d8424ed16c33777e81c78bab929bda4b5c5aeb29 100644 (file)
@@ -344,7 +344,6 @@ struct adapter_params {
 
        unsigned int sf_size;             /* serial flash size in bytes */
        unsigned int sf_nsec;             /* # of flash sectors */
-       unsigned int sf_fw_start;         /* start of FW image in flash */
 
        unsigned int fw_vers;             /* firmware version */
        unsigned int bs_vers;             /* bootstrap version */
index f63210f155796c9809415d7da54f59086ebdbf7c..375ef86a84da1b08c0a3ff1dd2a6a5380600acb4 100644 (file)
@@ -2844,8 +2844,6 @@ enum {
        SF_RD_DATA_FAST = 0xb,        /* read flash */
        SF_RD_ID        = 0x9f,       /* read ID */
        SF_ERASE_SECTOR = 0xd8,       /* erase sector */
-
-       FW_MAX_SIZE = 16 * SF_SEC_SIZE,
 };
 
 /**
@@ -3558,8 +3556,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
        const __be32 *p = (const __be32 *)fw_data;
        const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data;
        unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec;
-       unsigned int fw_img_start = adap->params.sf_fw_start;
-       unsigned int fw_start_sec = fw_img_start / sf_sec_size;
+       unsigned int fw_start_sec = FLASH_FW_START_SEC;
+       unsigned int fw_size = FLASH_FW_MAX_SIZE;
+       unsigned int fw_start = FLASH_FW_START;
 
        if (!size) {
                dev_err(adap->pdev_dev, "FW image has no data\n");
@@ -3575,9 +3574,9 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
                        "FW image size differs from size in FW header\n");
                return -EINVAL;
        }
-       if (size > FW_MAX_SIZE) {
+       if (size > fw_size) {
                dev_err(adap->pdev_dev, "FW image too large, max is %u bytes\n",
-                       FW_MAX_SIZE);
+                       fw_size);
                return -EFBIG;
        }
        if (!t4_fw_matches_chip(adap, hdr))
@@ -3604,11 +3603,11 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
         */
        memcpy(first_page, fw_data, SF_PAGE_SIZE);
        ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
-       ret = t4_write_flash(adap, fw_img_start, SF_PAGE_SIZE, first_page);
+       ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
        if (ret)
                goto out;
 
-       addr = fw_img_start;
+       addr = fw_start;
        for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
                addr += SF_PAGE_SIZE;
                fw_data += SF_PAGE_SIZE;
@@ -3618,7 +3617,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
        }
 
        ret = t4_write_flash(adap,
-                            fw_img_start + offsetof(struct fw_hdr, fw_ver),
+                            fw_start + offsetof(struct fw_hdr, fw_ver),
                             sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
 out:
        if (ret)
index 6105738552134809fc0d0abf8caadfa76f5e1f8a..a74300a4459c78d7797cff90962a1a7a394dccb5 100644 (file)
@@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev)
                for (i = 0; i < txq->bd.ring_size; i++) {
                        /* Initialize the BD for every fragment in the page. */
                        bdp->cbd_sc = cpu_to_fec16(0);
+                       if (bdp->cbd_bufaddr &&
+                           !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+                               dma_unmap_single(&fep->pdev->dev,
+                                                fec32_to_cpu(bdp->cbd_bufaddr),
+                                                fec16_to_cpu(bdp->cbd_datlen),
+                                                DMA_TO_DEVICE);
                        if (txq->tx_skbuff[i]) {
                                dev_kfree_skb_any(txq->tx_skbuff[i]);
                                txq->tx_skbuff[i] = NULL;
@@ -3463,6 +3469,10 @@ fec_probe(struct platform_device *pdev)
                        goto failed_regulator;
                }
        } else {
+               if (PTR_ERR(fep->reg_phy) == -EPROBE_DEFER) {
+                       ret = -EPROBE_DEFER;
+                       goto failed_regulator;
+               }
                fep->reg_phy = NULL;
        }
 
@@ -3546,8 +3556,9 @@ failed_clk_ipg:
 failed_clk:
        if (of_phy_is_fixed_link(np))
                of_phy_deregister_fixed_link(np);
-failed_phy:
        of_node_put(phy_node);
+failed_phy:
+       dev_id--;
 failed_ioremap:
        free_netdev(ndev);
 
index 544114281ea754fc9c2221b8aa2f2b8d3ef842f6..9f8d4f8e57e30fa09ca1e8af3aa0d6a017633c1d 100644 (file)
@@ -319,11 +319,10 @@ static int ptp_gianfar_adjtime(struct ptp_clock_info *ptp, s64 delta)
        now = tmr_cnt_read(etsects);
        now += delta;
        tmr_cnt_write(etsects, now);
+       set_fipers(etsects);
 
        spin_unlock_irqrestore(&etsects->lock, flags);
 
-       set_fipers(etsects);
-
        return 0;
 }
 
index d7bdea79e9fa755f8a9c6ad63b0ed1eb1e3c63f5..8fd2458060a088d6475622e4ae68cd40df23c116 100644 (file)
@@ -331,7 +331,8 @@ struct e1000_adapter {
 enum e1000_state_t {
        __E1000_TESTING,
        __E1000_RESETTING,
-       __E1000_DOWN
+       __E1000_DOWN,
+       __E1000_DISABLED
 };
 
 #undef pr_fmt
index 1982f7917a8d5d68776b2e052ac1df809f11c065..3dd4aeb2706d393cd8fbf4998a7582d33b9bafcd 100644 (file)
@@ -945,7 +945,7 @@ static int e1000_init_hw_struct(struct e1000_adapter *adapter,
 static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        struct net_device *netdev;
-       struct e1000_adapter *adapter;
+       struct e1000_adapter *adapter = NULL;
        struct e1000_hw *hw;
 
        static int cards_found;
@@ -955,6 +955,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        u16 tmp = 0;
        u16 eeprom_apme_mask = E1000_EEPROM_APME;
        int bars, need_ioport;
+       bool disable_dev = false;
 
        /* do not allocate ioport bars when not needed */
        need_ioport = e1000_is_need_ioport(pdev);
@@ -1259,11 +1260,13 @@ err_mdio_ioremap:
        iounmap(hw->ce4100_gbe_mdio_base_virt);
        iounmap(hw->hw_addr);
 err_ioremap:
+       disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
        free_netdev(netdev);
 err_alloc_etherdev:
        pci_release_selected_regions(pdev, bars);
 err_pci_reg:
-       pci_disable_device(pdev);
+       if (!adapter || disable_dev)
+               pci_disable_device(pdev);
        return err;
 }
 
@@ -1281,6 +1284,7 @@ static void e1000_remove(struct pci_dev *pdev)
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct e1000_adapter *adapter = netdev_priv(netdev);
        struct e1000_hw *hw = &adapter->hw;
+       bool disable_dev;
 
        e1000_down_and_stop(adapter);
        e1000_release_manageability(adapter);
@@ -1299,9 +1303,11 @@ static void e1000_remove(struct pci_dev *pdev)
                iounmap(hw->flash_address);
        pci_release_selected_regions(pdev, adapter->bars);
 
+       disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
        free_netdev(netdev);
 
-       pci_disable_device(pdev);
+       if (disable_dev)
+               pci_disable_device(pdev);
 }
 
 /**
@@ -5156,7 +5162,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake)
        if (netif_running(netdev))
                e1000_free_irq(adapter);
 
-       pci_disable_device(pdev);
+       if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
+               pci_disable_device(pdev);
 
        return 0;
 }
@@ -5200,6 +5207,10 @@ static int e1000_resume(struct pci_dev *pdev)
                pr_err("Cannot enable PCI device from suspend\n");
                return err;
        }
+
+       /* flush memory to make sure state is correct */
+       smp_mb__before_atomic();
+       clear_bit(__E1000_DISABLED, &adapter->flags);
        pci_set_master(pdev);
 
        pci_enable_wake(pdev, PCI_D3hot, 0);
@@ -5274,7 +5285,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
 
        if (netif_running(netdev))
                e1000_down(adapter);
-       pci_disable_device(pdev);
+
+       if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
+               pci_disable_device(pdev);
 
        /* Request a slot slot reset. */
        return PCI_ERS_RESULT_NEED_RESET;
@@ -5302,6 +5315,10 @@ static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
                pr_err("Cannot re-enable PCI device after reset.\n");
                return PCI_ERS_RESULT_DISCONNECT;
        }
+
+       /* flush memory to make sure state is correct */
+       smp_mb__before_atomic();
+       clear_bit(__E1000_DISABLED, &adapter->flags);
        pci_set_master(pdev);
 
        pci_enable_wake(pdev, PCI_D3hot, 0);
index d6d4ed7acf03117232778a82a757ee44d65f765a..31277d3bb7dc1241032695d2d9424779654f4f5f 100644 (file)
@@ -1367,6 +1367,9 @@ out:
  *  Checks to see of the link status of the hardware has changed.  If a
  *  change in link status has been detected, then we read the PHY registers
  *  to get the current speed/duplex if link exists.
+ *
+ *  Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
+ *  up).
  **/
 static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 {
@@ -1382,7 +1385,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
         * Change or Rx Sequence Error interrupt.
         */
        if (!mac->get_link_status)
-               return 0;
+               return 1;
 
        /* First we want to see if the MII Status Register reports
         * link.  If so, then we want to get the current speed/duplex
@@ -1613,10 +1616,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
         * different link partner.
         */
        ret_val = e1000e_config_fc_after_link_up(hw);
-       if (ret_val)
+       if (ret_val) {
                e_dbg("Error configuring flow control\n");
+               return ret_val;
+       }
 
-       return ret_val;
+       return 1;
 }
 
 static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
index 321d8be80871ce2fc4a5ab39c3189dad2370c988..42dcaefc4c1942777edf6044953ccd1f798cac2a 100644 (file)
@@ -1573,11 +1573,18 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
        else
                netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
 
+       /* Copy the address first, so that we avoid a possible race with
+        * .set_rx_mode(). If we copy after changing the address in the filter
+        * list, we might open ourselves to a narrow race window where
+        * .set_rx_mode could delete our dev_addr filter and prevent traffic
+        * from passing.
+        */
+       ether_addr_copy(netdev->dev_addr, addr->sa_data);
+
        spin_lock_bh(&vsi->mac_filter_hash_lock);
        i40e_del_mac_filter(vsi, netdev->dev_addr);
        i40e_add_mac_filter(vsi, addr->sa_data);
        spin_unlock_bh(&vsi->mac_filter_hash_lock);
-       ether_addr_copy(netdev->dev_addr, addr->sa_data);
        if (vsi->type == I40E_VSI_MAIN) {
                i40e_status ret;
 
@@ -1923,6 +1930,14 @@ static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_vsi *vsi = np->vsi;
 
+       /* Under some circumstances, we might receive a request to delete
+        * our own device address from our uc list. Because we store the
+        * device address in the VSI's MAC/VLAN filter list, we need to ignore
+        * such requests and not delete our device address from this list.
+        */
+       if (ether_addr_equal(addr, netdev->dev_addr))
+               return 0;
+
        i40e_del_mac_filter(vsi, addr);
 
        return 0;
@@ -6038,8 +6053,8 @@ static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
        /* Set Bit 7 to be valid */
        mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
 
-       /* Set L4type to both TCP and UDP support */
-       mode |= I40E_AQ_SET_SWITCH_L4_TYPE_BOTH;
+       /* Set L4type for TCP support */
+       mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP;
 
        /* Set cloud filter mode */
        mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
@@ -6969,18 +6984,18 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
             is_valid_ether_addr(filter->src_mac)) ||
            (is_multicast_ether_addr(filter->dst_mac) &&
             is_multicast_ether_addr(filter->src_mac)))
-               return -EINVAL;
+               return -EOPNOTSUPP;
 
-       /* Make sure port is specified, otherwise bail out, for channel
-        * specific cloud filter needs 'L4 port' to be non-zero
+       /* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP
+        * ports are not supported via big buffer now.
         */
-       if (!filter->dst_port)
-               return -EINVAL;
+       if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP)
+               return -EOPNOTSUPP;
 
        /* adding filter using src_port/src_ip is not supported at this stage */
        if (filter->src_port || filter->src_ipv4 ||
            !ipv6_addr_any(&filter->ip.v6.src_ip6))
-               return -EINVAL;
+               return -EOPNOTSUPP;
 
        /* copy element needed to add cloud filter from filter */
        i40e_set_cld_element(filter, &cld_filter.element);
@@ -6991,7 +7006,7 @@ static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
            is_multicast_ether_addr(filter->src_mac)) {
                /* MAC + IP : unsupported mode */
                if (filter->dst_ipv4)
-                       return -EINVAL;
+                       return -EOPNOTSUPP;
 
                /* since we validated that L4 port must be valid before
                 * we get here, start with respective "flags" value
@@ -7356,7 +7371,7 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
 
        if (tc < 0) {
                dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
-               return -EINVAL;
+               return -EOPNOTSUPP;
        }
 
        if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
index 4566d66ffc7c95a72c4135075a19d5ac8bcd2689..5bc2748ac46860df3a14553d870197d0d67ddda4 100644 (file)
@@ -3047,10 +3047,30 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
        /* Walk through fragments adding latest fragment, testing it, and
         * then removing stale fragments from the sum.
         */
-       stale = &skb_shinfo(skb)->frags[0];
-       for (;;) {
+       for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+               int stale_size = skb_frag_size(stale);
+
                sum += skb_frag_size(frag++);
 
+               /* The stale fragment may present us with a smaller
+                * descriptor than the actual fragment size. To account
+                * for that we need to remove all the data on the front and
+                * figure out what the remainder would be in the last
+                * descriptor associated with the fragment.
+                */
+               if (stale_size > I40E_MAX_DATA_PER_TXD) {
+                       int align_pad = -(stale->page_offset) &
+                                       (I40E_MAX_READ_REQ_SIZE - 1);
+
+                       sum -= align_pad;
+                       stale_size -= align_pad;
+
+                       do {
+                               sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+                               stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+                       } while (stale_size > I40E_MAX_DATA_PER_TXD);
+               }
+
                /* if sum is negative we failed to make sufficient progress */
                if (sum < 0)
                        return true;
@@ -3058,7 +3078,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
                if (!nr_frags--)
                        break;
 
-               sum -= skb_frag_size(stale++);
+               sum -= stale_size;
        }
 
        return false;
index 50864f99446d3a9f22c83c984eaec4f25caa6015..1ba29bb85b6706c15b5d80d760a7128afd31e0fe 100644 (file)
@@ -2012,10 +2012,30 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
        /* Walk through fragments adding latest fragment, testing it, and
         * then removing stale fragments from the sum.
         */
-       stale = &skb_shinfo(skb)->frags[0];
-       for (;;) {
+       for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+               int stale_size = skb_frag_size(stale);
+
                sum += skb_frag_size(frag++);
 
+               /* The stale fragment may present us with a smaller
+                * descriptor than the actual fragment size. To account
+                * for that we need to remove all the data on the front and
+                * figure out what the remainder would be in the last
+                * descriptor associated with the fragment.
+                */
+               if (stale_size > I40E_MAX_DATA_PER_TXD) {
+                       int align_pad = -(stale->page_offset) &
+                                       (I40E_MAX_READ_REQ_SIZE - 1);
+
+                       sum -= align_pad;
+                       stale_size -= align_pad;
+
+                       do {
+                               sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+                               stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+                       } while (stale_size > I40E_MAX_DATA_PER_TXD);
+               }
+
                /* if sum is negative we failed to make sufficient progress */
                if (sum < 0)
                        return true;
@@ -2023,7 +2043,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb)
                if (!nr_frags--)
                        break;
 
-               sum -= skb_frag_size(stale++);
+               sum -= stale_size;
        }
 
        return false;
index bc93b69cfd1edcf62d11cd24d41a9ca74b8f0dcc..a539263cd79ce4be8fcc0cbfe6bfdd196336cd38 100644 (file)
@@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
        val &= ~MVNETA_GMAC0_PORT_ENABLE;
        mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
 
+       pp->link = 0;
+       pp->duplex = -1;
+       pp->speed = 0;
+
        udelay(200);
 }
 
@@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 
                if (!mvneta_rxq_desc_is_first_last(rx_status) ||
                    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+                       mvneta_rx_error(pp, rx_desc);
 err_drop_frame:
                        dev->stats.rx_errors++;
-                       mvneta_rx_error(pp, rx_desc);
                        /* leave the descriptor untouched */
                        continue;
                }
@@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
 {
        int queue;
 
-       for (queue = 0; queue < txq_number; queue++)
+       for (queue = 0; queue < rxq_number; queue++)
                mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
 }
 
index 54adfd96785846f9e60a2ded11ab96bc0c196c7e..fc67e35b253e4e59c12227c3e24da9c0f5bae311 100644 (file)
@@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
        /* set GE2 TUNE */
        regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
 
-       /* GE1, Force 1000M/FD, FC ON */
-       mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0));
-
-       /* GE2, Force 1000M/FD, FC ON */
-       mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1));
+       /* Set linkdown as the default for each GMAC. Its own MCR would be set
+        * up with the more appropriate value when mtk_phy_link_adjust call is
+        * being invoked.
+        */
+       for (i = 0; i < MTK_MAC_COUNT; i++)
+               mtk_w32(eth, 0, MTK_MAC_MCR(i));
 
        /* Indicates CDM to parse the MTK special tag from CPU
         * which also is working out for untag packets.
index 1fffdebbc9e8994c70a19f4982f26d1de98be5f2..e9a1fbcc4adfa6e692902b551d0c535bfe019a9a 100644 (file)
@@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
        case MLX5_CMD_OP_ALLOC_Q_COUNTER:
        case MLX5_CMD_OP_QUERY_Q_COUNTER:
-       case MLX5_CMD_OP_SET_RATE_LIMIT:
+       case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
        case MLX5_CMD_OP_QUERY_RATE_LIMIT:
        case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
        case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
@@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
        MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
-       MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
+       MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
        MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
        MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
        MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
index c0872b3284cb405583642d71a0e2e540d4804b6f..543060c305a073c0457cc31ae7318f425a0e7c49 100644 (file)
@@ -82,6 +82,9 @@
        max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
 #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev)       MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
 #define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
+#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
+       (cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
+       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
 
 #define MLX5_MPWRQ_LOG_WQE_SZ                  18
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
@@ -590,6 +593,7 @@ struct mlx5e_channel {
        struct mlx5_core_dev      *mdev;
        struct hwtstamp_config    *tstamp;
        int                        ix;
+       int                        cpu;
 };
 
 struct mlx5e_channels {
@@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
                                 u8 cq_period_mode);
 void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
                                 u8 cq_period_mode);
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-                             struct mlx5e_params *params, u8 rq_type);
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+                              struct mlx5e_params *params,
+                              u8 rq_type);
 
 static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
 {
index c6d90b6dd80efa9a1ee82958adf5ef4dd3b4522d..9bcf38f4123b504637c080413078c23304d9e49e 100644 (file)
@@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
                                    struct ieee_ets *ets)
 {
+       bool have_ets_tc = false;
        int bw_sum = 0;
        int i;
 
@@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
        }
 
        /* Validate Bandwidth Sum */
-       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
-               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+       for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+               if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
+                       have_ets_tc = true;
                        bw_sum += ets->tc_tx_bw[i];
+               }
+       }
 
-       if (bw_sum != 0 && bw_sum != 100) {
+       if (have_ets_tc && bw_sum != 100) {
                netdev_err(netdev,
                           "Failed to validate ETS: BW sum is illegal\n");
                return -EINVAL;
index 23425f02840581f6be591bc48cf8cccc8cc26443..8f05efa5c829bccb67ddd8b24dc2997adfe4a6c8 100644 (file)
@@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
        new_channels.params = priv->channels.params;
        MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
 
-       mlx5e_set_rq_type_params(priv->mdev, &new_channels.params,
-                                new_channels.params.rq_wq_type);
+       new_channels.params.mpwqe_log_stride_sz =
+               MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
+       new_channels.params.mpwqe_log_num_strides =
+               MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
 
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
                priv->channels.params = new_channels.params;
@@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
                return err;
 
        mlx5e_switch_priv_channels(priv, &new_channels, NULL);
+       mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
+                 MLX5E_GET_PFLAG(&priv->channels.params,
+                                 MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
+
        return 0;
 }
 
index d2b057a3e512c1144d741ccffd5bf47b5f138a01..d9d8227f195f0e151ba948e0622ea90a411817c4 100644 (file)
@@ -71,11 +71,6 @@ struct mlx5e_channel_param {
        struct mlx5e_cq_param      icosq_cq;
 };
 
-static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
-{
-       return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
-}
-
 static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
        return MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -83,8 +78,8 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
                MLX5_CAP_ETH(mdev, reg_umr_sq);
 }
 
-void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
-                             struct mlx5e_params *params, u8 rq_type)
+void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
+                              struct mlx5e_params *params, u8 rq_type)
 {
        params->rq_wq_type = rq_type;
        params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
                params->log_rq_size = is_kdump_kernel() ?
                        MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
                        MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-               params->mpwqe_log_stride_sz =
-                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
-                       MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
-                       MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
+               params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
+                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
                params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
                        params->mpwqe_log_stride_sz;
                break;
@@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
                       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
-static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
+static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
+                               struct mlx5e_params *params)
 {
        u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
                    !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
                    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
                    MLX5_WQ_TYPE_LINKED_LIST;
-       mlx5e_set_rq_type_params(mdev, params, rq_type);
+       mlx5e_init_rq_type_params(mdev, params, rq_type);
 }
 
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
        int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
        int mtt_sz = mlx5e_get_wqe_mtt_sz();
        int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
-       int node = mlx5e_get_node(c->priv, c->ix);
        int i;
 
        rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
-                                       GFP_KERNEL, node);
+                                     GFP_KERNEL, cpu_to_node(c->cpu));
        if (!rq->mpwqe.info)
                goto err_out;
 
        /* We allocate more than mtt_sz as we will align the pointer */
-       rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
-                                       GFP_KERNEL, node);
+       rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+                                       cpu_to_node(c->cpu));
        if (unlikely(!rq->mpwqe.mtt_no_align))
                goto err_free_wqe_info;
 
@@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
        int err;
        int i;
 
-       rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       rqp->wq.db_numa_node = cpu_to_node(c->cpu);
 
        err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
                                &rq->wq_ctrl);
@@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                rq->wqe.frag_info =
                        kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
-                                    GFP_KERNEL,
-                                    mlx5e_get_node(c->priv, c->ix));
+                                    GFP_KERNEL, cpu_to_node(c->cpu));
                if (!rq->wqe.frag_info) {
                        err = -ENOMEM;
                        goto err_rq_wq_destroy;
@@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
        sq->min_inline_mode = params->tx_min_inline_mode;
 
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
                return err;
        sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
@@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
        sq->channel   = c;
        sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
                return err;
        sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
@@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
        if (MLX5_IPSEC_DEV(c->priv->mdev))
                set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 
-       param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
+       param->wq.db_numa_node = cpu_to_node(c->cpu);
        err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
        if (err)
                return err;
        sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
 
-       err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
+       err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
        if (err)
                goto err_sq_wq_destroy;
 
@@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
        struct mlx5_core_dev *mdev = c->priv->mdev;
        int err;
 
-       param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
-       param->wq.db_numa_node  = mlx5e_get_node(c->priv, c->ix);
+       param->wq.buf_numa_node = cpu_to_node(c->cpu);
+       param->wq.db_numa_node  = cpu_to_node(c->cpu);
        param->eq_ix   = c->ix;
 
        err = mlx5e_alloc_cq_common(mdev, param, cq);
@@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
        mlx5e_free_cq(cq);
 }
 
+static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
+{
+       return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
+}
+
 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
                             struct mlx5e_params *params,
                             struct mlx5e_channel_param *cparam)
@@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 {
        struct mlx5e_cq_moder icocq_moder = {0, 0};
        struct net_device *netdev = priv->netdev;
+       int cpu = mlx5e_get_cpu(priv, ix);
        struct mlx5e_channel *c;
        unsigned int irq;
        int err;
        int eqn;
 
-       c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
+       c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
        if (!c)
                return -ENOMEM;
 
@@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        c->mdev     = priv->mdev;
        c->tstamp   = &priv->tstamp;
        c->ix       = ix;
+       c->cpu      = cpu;
        c->pdev     = &priv->mdev->pdev->dev;
        c->netdev   = priv->netdev;
        c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
        for (tc = 0; tc < c->num_tc; tc++)
                mlx5e_activate_txqsq(&c->sq[tc]);
        mlx5e_activate_rq(&c->rq);
-       netif_set_xps_queue(c->netdev,
-               mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
+       netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
 }
 
 static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                                                     struct sk_buff *skb,
                                                     netdev_features_t features)
 {
+       unsigned int offset = 0;
        struct udphdr *udph;
        u8 proto;
        u16 port;
@@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
                proto = ip_hdr(skb)->protocol;
                break;
        case htons(ETH_P_IPV6):
-               proto = ipv6_hdr(skb)->nexthdr;
+               proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
                break;
        default:
                goto out;
index 60771865c99c9bf4402d042a760887c4497e0036..e7e7cef2bde402be23b191873a5790ed23fd7843 100644 (file)
@@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
                        break;
                case MLX5_EVENT_TYPE_CQ_ERROR:
                        cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
-                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
+                       mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
                                       cqn, eqe->data.cq_err.syndrome);
                        mlx5_cq_event(dev, cqn, eqe->type);
                        break;
@@ -775,7 +775,7 @@ err1:
        return err;
 }
 
-int mlx5_stop_eqs(struct mlx5_core_dev *dev)
+void mlx5_stop_eqs(struct mlx5_core_dev *dev)
 {
        struct mlx5_eq_table *table = &dev->priv.eq_table;
        int err;
@@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
        if (MLX5_CAP_GEN(dev, pg)) {
                err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
                if (err)
-                       return err;
+                       mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
+                                     err);
        }
 #endif
 
        err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
        if (err)
-               return err;
+               mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
+                             err);
 
-       mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
+       if (err)
+               mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
+                             err);
        mlx5_cmd_use_polling(dev);
 
        err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
        if (err)
-               mlx5_cmd_use_events(dev);
-
-       return err;
+               mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
+                             err);
 }
 
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
index 3c11d6e2160abeef5a893b7b81274a7ce315368c..14962969c5ba8c4462662eeb30ef10cbe1c27fa6 100644 (file)
@@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
        u8 actual_size;
        int err;
 
+       if (!size)
+               return -EINVAL;
+
        if (!fdev->mdev)
                return -ENOTCONN;
 
@@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
        u8 actual_size;
        int err;
 
+       if (!size)
+               return -EINVAL;
+
        if (!fdev->mdev)
                return -ENOTCONN;
 
index c70fd663a63301e7e89ef9ee00d37c7075fe1a0b..dfaad9ecb2b8f155c5cdf30451c572b2d10f1d37 100644 (file)
@@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
 static void del_sw_flow_table(struct fs_node *node);
 static void del_sw_flow_group(struct fs_node *node);
 static void del_sw_fte(struct fs_node *node);
+static void del_sw_prio(struct fs_node *node);
+static void del_sw_ns(struct fs_node *node);
 /* Delete rule (destination) is special case that 
  * requires to lock the FTE for all the deletion process.
  */
@@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
        return NULL;
 }
 
+static void del_sw_ns(struct fs_node *node)
+{
+       kfree(node);
+}
+
+static void del_sw_prio(struct fs_node *node)
+{
+       kfree(node);
+}
+
 static void del_hw_flow_table(struct fs_node *node)
 {
        struct mlx5_flow_table *ft;
@@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
                return ERR_PTR(-ENOMEM);
 
        fs_prio->node.type = FS_TYPE_PRIO;
-       tree_init_node(&fs_prio->node, NULL, NULL);
+       tree_init_node(&fs_prio->node, NULL, del_sw_prio);
        tree_add_node(&fs_prio->node, &ns->node);
        fs_prio->num_levels = num_levels;
        fs_prio->prio = prio;
@@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
                return ERR_PTR(-ENOMEM);
 
        fs_init_namespace(ns);
-       tree_init_node(&ns->node, NULL, NULL);
+       tree_init_node(&ns->node, NULL, del_sw_ns);
        tree_add_node(&ns->node, &prio->node);
        list_add_tail(&ns->node.list, &prio->node.children);
 
index 1a0e797ad001ad672c954c228350ff9bcdea125b..21d29f7936f6c5d1e26c6e0d3f10644fd0f096c8 100644 (file)
@@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
        u32 fw;
        int i;
 
-       /* If the syndrom is 0, the device is OK and no need to print buffer */
+       /* If the syndrome is 0, the device is OK and no need to print buffer */
        if (!ioread8(&h->synd))
                return;
 
index d2a66dc4adc6d2933cfbc60c28cd49c67716a010..8812d7208e8f3522500b3f3e971b4a7341b22c8f 100644 (file)
@@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
                                   struct mlx5e_params *params)
 {
        /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
-       mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
+       mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
 
        /* RQ size in ipoib by default is 512 */
        params->log_rq_size = is_kdump_kernel() ?
index f26f97fe46666ff7a3f2ce6c496cb8936ee5cbb3..582b2f18010a317600f0238163557077d4c48a39 100644 (file)
@@ -137,6 +137,17 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
 }
 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
 
+static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
+                                      bool reset, void *out, int out_size)
+{
+       u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
+
+       MLX5_SET(query_cong_statistics_in, in, opcode,
+                MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+       MLX5_SET(query_cong_statistics_in, in, clear, reset);
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+
 static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
 {
        return dev->priv.lag;
@@ -633,3 +644,48 @@ bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
        /* If bonded, we do not add an IB device for PF1. */
        return false;
 }
+
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+                                u64 *values,
+                                int num_counters,
+                                size_t *offsets)
+{
+       int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
+       struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
+       struct mlx5_lag *ldev;
+       int num_ports;
+       int ret, i, j;
+       void *out;
+
+       out = kvzalloc(outlen, GFP_KERNEL);
+       if (!out)
+               return -ENOMEM;
+
+       memset(values, 0, sizeof(*values) * num_counters);
+
+       mutex_lock(&lag_mutex);
+       ldev = mlx5_lag_dev_get(dev);
+       if (ldev && mlx5_lag_is_bonded(ldev)) {
+               num_ports = MLX5_MAX_PORTS;
+               mdev[0] = ldev->pf[0].dev;
+               mdev[1] = ldev->pf[1].dev;
+       } else {
+               num_ports = 1;
+               mdev[0] = dev;
+       }
+
+       for (i = 0; i < num_ports; ++i) {
+               ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
+               if (ret)
+                       goto unlock;
+
+               for (j = 0; j < num_counters; ++j)
+                       values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
+       }
+
+unlock:
+       mutex_unlock(&lag_mutex);
+       kvfree(out);
+       return ret;
+}
+EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
index 5f323442cc5ac009d5006438d93183e96b85d0d9..8a89c7e8cd631f2e14cb7cbac99a8983964b7eda 100644 (file)
@@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
 {
        struct mlx5_priv *priv = &dev->priv;
        struct mlx5_eq_table *table = &priv->eq_table;
-       struct irq_affinity irqdesc = {
-               .pre_vectors = MLX5_EQ_VEC_COMP_BASE,
-       };
        int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
        int nvec;
 
@@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
        if (!priv->irq_info)
                goto err_free_msix;
 
-       nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
+       nvec = pci_alloc_irq_vectors(dev->pdev,
                        MLX5_EQ_VEC_COMP_BASE + 1, nvec,
-                       PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
-                       &irqdesc);
+                       PCI_IRQ_MSIX);
        if (nvec < 0)
                return nvec;
 
@@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
        return (u64)timer_l | (u64)timer_h1 << 32;
 }
 
+static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+       if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
+               mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
+               return -ENOMEM;
+       }
+
+       cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
+                       priv->irq_info[i].mask);
+
+       if (IS_ENABLED(CONFIG_SMP) &&
+           irq_set_affinity_hint(irq, priv->irq_info[i].mask))
+               mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
+
+       return 0;
+}
+
+static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
+{
+       struct mlx5_priv *priv  = &mdev->priv;
+       int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
+
+       irq_set_affinity_hint(irq, NULL);
+       free_cpumask_var(priv->irq_info[i].mask);
+}
+
+static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int err;
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
+               err = mlx5_irq_set_affinity_hint(mdev, i);
+               if (err)
+                       goto err_out;
+       }
+
+       return 0;
+
+err_out:
+       for (i--; i >= 0; i--)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+
+       return err;
+}
+
+static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
+{
+       int i;
+
+       for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
+               mlx5_irq_clear_affinity_hint(mdev, i);
+}
+
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                    unsigned int *irqn)
 {
@@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_stop_eqs;
        }
 
+       err = mlx5_irq_set_affinity_hints(dev);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
+               goto err_affinity_hints;
+       }
+
        err = mlx5_init_fs(dev);
        if (err) {
                dev_err(&pdev->dev, "Failed to init flow steering\n");
@@ -1154,6 +1213,9 @@ err_sriov:
        mlx5_cleanup_fs(dev);
 
 err_fs:
+       mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
        free_comp_eqs(dev);
 
 err_stop_eqs:
@@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 
        mlx5_sriov_detach(dev);
        mlx5_cleanup_fs(dev);
+       mlx5_irq_clear_affinity_hints(dev);
        free_comp_eqs(dev);
        mlx5_stop_eqs(dev);
        mlx5_put_uars_page(dev, priv->uar);
index db9e665ab10474f934131b5c2a8fa2173fa5feb6..889130edb71525ecd1f46e88a11b2d3fa0ef843f 100644 (file)
@@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 err_cmd:
        memset(din, 0, sizeof(din));
        memset(dout, 0, sizeof(dout));
-       MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
-       MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+       MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+       MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
        mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
        return err;
 }
index e651e4c02867740d35c07bfcf485860f26ad6409..d3c33e9eea7292412974802c4c38ded8898ed55c 100644 (file)
@@ -125,16 +125,16 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
        return ret_entry;
 }
 
-static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
+static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
                                   u32 rate, u16 index)
 {
-       u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]   = {0};
-       u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
+       u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)]   = {0};
+       u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {0};
 
-       MLX5_SET(set_rate_limit_in, in, opcode,
-                MLX5_CMD_OP_SET_RATE_LIMIT);
-       MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
-       MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
+       MLX5_SET(set_pp_rate_limit_in, in, opcode,
+                MLX5_CMD_OP_SET_PP_RATE_LIMIT);
+       MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, index);
+       MLX5_SET(set_pp_rate_limit_in, in, rate_limit, rate);
        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
@@ -173,7 +173,7 @@ int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index)
                entry->refcount++;
        } else {
                /* new rate limit */
-               err = mlx5_set_rate_limit_cmd(dev, rate, entry->index);
+               err = mlx5_set_pp_rate_limit_cmd(dev, rate, entry->index);
                if (err) {
                        mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n",
                                      rate, err);
@@ -209,7 +209,7 @@ void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate)
        entry->refcount--;
        if (!entry->refcount) {
                /* need to remove rate */
-               mlx5_set_rate_limit_cmd(dev, 0, entry->index);
+               mlx5_set_pp_rate_limit_cmd(dev, 0, entry->index);
                entry->rate = 0;
        }
 
@@ -262,8 +262,8 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev)
        /* Clear all configured rates */
        for (i = 0; i < table->max_size; i++)
                if (table->rl_entry[i].rate)
-                       mlx5_set_rate_limit_cmd(dev, 0,
-                                               table->rl_entry[i].index);
+                       mlx5_set_pp_rate_limit_cmd(dev, 0,
+                                                  table->rl_entry[i].index);
 
        kfree(dev->priv.rl_table.rl_entry);
 }
index 07a9ba6cfc70a11f7b4c05c73c1b32a704b7e6ba..2f74953e4561511e23d8fe3219db89104e3dd9e3 100644 (file)
@@ -71,9 +71,9 @@ struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
        struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
        struct mlx5e_vxlan *vxlan;
 
-       spin_lock(&vxlan_db->lock);
+       spin_lock_bh(&vxlan_db->lock);
        vxlan = radix_tree_lookup(&vxlan_db->tree, port);
-       spin_unlock(&vxlan_db->lock);
+       spin_unlock_bh(&vxlan_db->lock);
 
        return vxlan;
 }
@@ -88,8 +88,12 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
        struct mlx5e_vxlan *vxlan;
        int err;
 
-       if (mlx5e_vxlan_lookup_port(priv, port))
+       mutex_lock(&priv->state_lock);
+       vxlan = mlx5e_vxlan_lookup_port(priv, port);
+       if (vxlan) {
+               atomic_inc(&vxlan->refcount);
                goto free_work;
+       }
 
        if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port))
                goto free_work;
@@ -99,10 +103,11 @@ static void mlx5e_vxlan_add_port(struct work_struct *work)
                goto err_delete_port;
 
        vxlan->udp_port = port;
+       atomic_set(&vxlan->refcount, 1);
 
-       spin_lock_irq(&vxlan_db->lock);
+       spin_lock_bh(&vxlan_db->lock);
        err = radix_tree_insert(&vxlan_db->tree, vxlan->udp_port, vxlan);
-       spin_unlock_irq(&vxlan_db->lock);
+       spin_unlock_bh(&vxlan_db->lock);
        if (err)
                goto err_free;
 
@@ -113,35 +118,39 @@ err_free:
 err_delete_port:
        mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
 free_work:
+       mutex_unlock(&priv->state_lock);
        kfree(vxlan_work);
 }
 
-static void __mlx5e_vxlan_core_del_port(struct mlx5e_priv *priv, u16 port)
+static void mlx5e_vxlan_del_port(struct work_struct *work)
 {
+       struct mlx5e_vxlan_work *vxlan_work =
+               container_of(work, struct mlx5e_vxlan_work, work);
+       struct mlx5e_priv *priv         = vxlan_work->priv;
        struct mlx5e_vxlan_db *vxlan_db = &priv->vxlan;
+       u16 port = vxlan_work->port;
        struct mlx5e_vxlan *vxlan;
+       bool remove = false;
 
-       spin_lock_irq(&vxlan_db->lock);
-       vxlan = radix_tree_delete(&vxlan_db->tree, port);
-       spin_unlock_irq(&vxlan_db->lock);
-
+       mutex_lock(&priv->state_lock);
+       spin_lock_bh(&vxlan_db->lock);
+       vxlan = radix_tree_lookup(&vxlan_db->tree, port);
        if (!vxlan)
-               return;
-
-       mlx5e_vxlan_core_del_port_cmd(priv->mdev, vxlan->udp_port);
-
-       kfree(vxlan);
-}
+               goto out_unlock;
 
-static void mlx5e_vxlan_del_port(struct work_struct *work)
-{
-       struct mlx5e_vxlan_work *vxlan_work =
-               container_of(work, struct mlx5e_vxlan_work, work);
-       struct mlx5e_priv *priv = vxlan_work->priv;
-       u16 port = vxlan_work->port;
+       if (atomic_dec_and_test(&vxlan->refcount)) {
+               radix_tree_delete(&vxlan_db->tree, port);
+               remove = true;
+       }
 
-       __mlx5e_vxlan_core_del_port(priv, port);
+out_unlock:
+       spin_unlock_bh(&vxlan_db->lock);
 
+       if (remove) {
+               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+               kfree(vxlan);
+       }
+       mutex_unlock(&priv->state_lock);
        kfree(vxlan_work);
 }
 
@@ -171,12 +180,11 @@ void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv)
        struct mlx5e_vxlan *vxlan;
        unsigned int port = 0;
 
-       spin_lock_irq(&vxlan_db->lock);
+       /* Lockless since we are the only radix-tree consumers, wq is disabled */
        while (radix_tree_gang_lookup(&vxlan_db->tree, (void **)&vxlan, port, 1)) {
                port = vxlan->udp_port;
-               spin_unlock_irq(&vxlan_db->lock);
-               __mlx5e_vxlan_core_del_port(priv, (u16)port);
-               spin_lock_irq(&vxlan_db->lock);
+               radix_tree_delete(&vxlan_db->tree, port);
+               mlx5e_vxlan_core_del_port_cmd(priv->mdev, port);
+               kfree(vxlan);
        }
-       spin_unlock_irq(&vxlan_db->lock);
 }
index 5def12c048e38992e7edd9f870233e369ef4580e..5ef6ae7d568abcd1410bc403b526628a634799cb 100644 (file)
@@ -36,6 +36,7 @@
 #include "en.h"
 
 struct mlx5e_vxlan {
+       atomic_t refcount;
        u16 udp_port;
 };
 
index 23f7d828cf676244766e582674a2946ea467bb6b..6ef20e5cc77dd314a60c9b1330fb421f81ceda7a 100644 (file)
@@ -1643,7 +1643,12 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
                return 0;
        }
 
-       wmb(); /* reset needs to be written before we read control register */
+       /* Reset needs to be written before we read control register, and
+        * we must wait for the HW to become responsive once again
+        */
+       wmb();
+       msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS);
+
        end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
        do {
                u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY);
index a6441208e9d96e0b96de0cde155f50fa437f56ae..fb082ad21b00e43435003e186eae92ff3d197a66 100644 (file)
@@ -59,6 +59,7 @@
 #define MLXSW_PCI_SW_RESET                     0xF0010
 #define MLXSW_PCI_SW_RESET_RST_BIT             BIT(0)
 #define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       5000
+#define MLXSW_PCI_SW_RESET_WAIT_MSECS          100
 #define MLXSW_PCI_FW_READY                     0xA1844
 #define MLXSW_PCI_FW_READY_MASK                        0xFFFF
 #define MLXSW_PCI_FW_READY_MAGIC               0x5E
index 9bd8d28de1522906b92021a8cf2c476b79c35a56..c3837ca7a705bd4d6ed48c1877d0f0f0de984c85 100644 (file)
@@ -4376,7 +4376,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                }
                if (!info->linking)
                        break;
-               if (netdev_has_any_upper_dev(upper_dev)) {
+               if (netdev_has_any_upper_dev(upper_dev) &&
+                   (!netif_is_bridge_master(upper_dev) ||
+                    !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
+                                                         upper_dev))) {
                        NL_SET_ERR_MSG(extack,
                                       "spectrum: Enslaving a port to a device that already has an upper device is not supported");
                        return -EINVAL;
@@ -4504,6 +4507,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
                                              u16 vid)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        struct netdev_notifier_changeupper_info *info = ptr;
        struct netlink_ext_ack *extack;
        struct net_device *upper_dev;
@@ -4520,7 +4524,10 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
                }
                if (!info->linking)
                        break;
-               if (netdev_has_any_upper_dev(upper_dev)) {
+               if (netdev_has_any_upper_dev(upper_dev) &&
+                   (!netif_is_bridge_master(upper_dev) ||
+                    !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
+                                                         upper_dev))) {
                        NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
                        return -EINVAL;
                }
index 432ab9b12b7f59ded586a663f0a686741740463d..05ce1befd9b378b41584c149abd6a0d53c4be246 100644 (file)
@@ -365,6 +365,8 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
 void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
                                struct net_device *brport_dev,
                                struct net_device *br_dev);
+bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
+                                        const struct net_device *br_dev);
 
 /* spectrum.c */
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
index c33beac5def0615ca24a7ade5f8fdd6b2651647c..b5397da94d7f5b178da560145ee8def18825df6e 100644 (file)
@@ -46,7 +46,8 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
                                  int tclass_num, u32 min, u32 max,
                                  u32 probability, bool is_ecn)
 {
-       char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)];
+       char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
+       char cwtp_cmd[MLXSW_REG_CWTP_LEN];
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        int err;
 
@@ -60,10 +61,10 @@ mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
        if (err)
                return err;
 
-       mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num,
+       mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
                             MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn);
 
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
 }
 
 static int
index 72ef4f8025f00ff8810c2955b25b7f3baec49be1..434b3922b34f06689e3e85c0dc70810902c51275 100644 (file)
@@ -2436,25 +2436,16 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
        rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
 }
 
-static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
-                                   const struct mlxsw_sp_rif *rif)
-{
-       char rauht_pl[MLXSW_REG_RAUHT_LEN];
-
-       mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
-                            rif->rif_index, rif->addr);
-       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
-}
-
 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
                                         struct mlxsw_sp_rif *rif)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
 
-       mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif);
        list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
-                                rif_list_node)
+                                rif_list_node) {
+               mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
                mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
+       }
 }
 
 enum mlxsw_sp_nexthop_type {
@@ -3237,7 +3228,7 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
 {
        if (!removing)
                nh->should_offload = 1;
-       else if (nh->offloaded)
+       else
                nh->should_offload = 0;
        nh->update = 1;
 }
index 7b8548e25ae73da7e835c7a8da1eaaece8cbbcfb..593ad31be7490d6af8abf3f5c3929de925c0ca77 100644 (file)
@@ -152,6 +152,12 @@ mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge,
        return NULL;
 }
 
+bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
+                                        const struct net_device *br_dev)
+{
+       return !!mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+}
+
 static struct mlxsw_sp_bridge_device *
 mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
                              struct net_device *br_dev)
index e379b78e86efa7c02dca2bc95afc1f79afc7800a..13190aa09faf748c16e1f00f7aee9097442aef85 100644 (file)
@@ -82,10 +82,33 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn)
        return nfp_net_ebpf_capable(nn) ? "BPF" : "";
 }
 
+static int
+nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
+{
+       int err;
+
+       nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
+       if (!nn->app_priv)
+               return -ENOMEM;
+
+       err = nfp_app_nic_vnic_alloc(app, nn, id);
+       if (err)
+               goto err_free_priv;
+
+       return 0;
+err_free_priv:
+       kfree(nn->app_priv);
+       return err;
+}
+
 static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
 {
+       struct nfp_bpf_vnic *bv = nn->app_priv;
+
        if (nn->dp.bpf_offload_xdp)
                nfp_bpf_xdp_offload(app, nn, NULL);
+       WARN_ON(bv->tc_prog);
+       kfree(bv);
 }
 
 static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
@@ -93,6 +116,9 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
 {
        struct tc_cls_bpf_offload *cls_bpf = type_data;
        struct nfp_net *nn = cb_priv;
+       struct bpf_prog *oldprog;
+       struct nfp_bpf_vnic *bv;
+       int err;
 
        if (type != TC_SETUP_CLSBPF ||
            !tc_can_offload(nn->dp.netdev) ||
@@ -100,8 +126,6 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
            cls_bpf->common.protocol != htons(ETH_P_ALL) ||
            cls_bpf->common.chain_index)
                return -EOPNOTSUPP;
-       if (nn->dp.bpf_offload_xdp)
-               return -EBUSY;
 
        /* Only support TC direct action */
        if (!cls_bpf->exts_integrated ||
@@ -110,16 +134,25 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
                return -EOPNOTSUPP;
        }
 
-       switch (cls_bpf->command) {
-       case TC_CLSBPF_REPLACE:
-               return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
-       case TC_CLSBPF_ADD:
-               return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
-       case TC_CLSBPF_DESTROY:
-               return nfp_net_bpf_offload(nn, NULL, true);
-       default:
+       if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
                return -EOPNOTSUPP;
+
+       bv = nn->app_priv;
+       oldprog = cls_bpf->oldprog;
+
+       /* Don't remove if oldprog doesn't match driver's state */
+       if (bv->tc_prog != oldprog) {
+               oldprog = NULL;
+               if (!cls_bpf->prog)
+                       return 0;
        }
+
+       err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
+       if (err)
+               return err;
+
+       bv->tc_prog = cls_bpf->prog;
+       return 0;
 }
 
 static int nfp_bpf_setup_tc_block(struct net_device *netdev,
@@ -167,7 +200,7 @@ const struct nfp_app_type app_bpf = {
 
        .extra_cap      = nfp_bpf_extra_cap,
 
-       .vnic_alloc     = nfp_app_nic_vnic_alloc,
+       .vnic_alloc     = nfp_bpf_vnic_alloc,
        .vnic_free      = nfp_bpf_vnic_free,
 
        .setup_tc       = nfp_bpf_setup_tc,
index 082a15f6dfb5b9ba806316bd4f272006c93749fb..57b6043177a3891c49096ab85906ee539b3d5ecb 100644 (file)
@@ -172,6 +172,14 @@ struct nfp_prog {
        struct list_head insns;
 };
 
+/**
+ * struct nfp_bpf_vnic - per-vNIC BPF priv structure
+ * @tc_prog:   currently loaded cls_bpf program
+ */
+struct nfp_bpf_vnic {
+       struct bpf_prog *tc_prog;
+};
+
 int nfp_bpf_jit(struct nfp_prog *prog);
 
 extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
index 1a603fdd9e802d1344e37bcfa719404b69312e43..99b0487b6d820b53befa8a5904a17d5a9176081a 100644 (file)
@@ -568,6 +568,7 @@ nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
                return err;
        }
        nn_writeb(nn, ctrl_offset, entry->entry);
+       nfp_net_irq_unmask(nn, entry->entry);
 
        return 0;
 }
@@ -582,6 +583,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
                                 unsigned int vector_idx)
 {
        nn_writeb(nn, ctrl_offset, 0xff);
+       nn_pci_flush(nn);
        free_irq(nn->irq_entries[vector_idx].vector, nn);
 }
 
index 70c92b649b299a1a16195e144e3da77cff25855c..38c924bdd32e46f3586eac77d5a63c361993fd09 100644 (file)
@@ -253,18 +253,18 @@ static int emac_open(struct net_device *netdev)
                return ret;
        }
 
-       ret = emac_mac_up(adpt);
+       ret = adpt->phy.open(adpt);
        if (ret) {
                emac_mac_rx_tx_rings_free_all(adpt);
                free_irq(irq->irq, irq);
                return ret;
        }
 
-       ret = adpt->phy.open(adpt);
+       ret = emac_mac_up(adpt);
        if (ret) {
-               emac_mac_down(adpt);
                emac_mac_rx_tx_rings_free_all(adpt);
                free_irq(irq->irq, irq);
+               adpt->phy.close(adpt);
                return ret;
        }
 
index 75323000c3646bc781c12287367f8b455ada5a6a..b9e2846589f8678e72683c230d601d7f517de5da 100644 (file)
@@ -147,7 +147,7 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
        [FWNLCR0]       = 0x0090,
        [FWALCR0]       = 0x0094,
        [TXNLCR1]       = 0x00a0,
-       [TXALCR1]       = 0x00a0,
+       [TXALCR1]       = 0x00a4,
        [RXNLCR1]       = 0x00a8,
        [RXALCR1]       = 0x00ac,
        [FWNLCR1]       = 0x00b0,
@@ -399,7 +399,7 @@ static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = {
        [FWNLCR0]       = 0x0090,
        [FWALCR0]       = 0x0094,
        [TXNLCR1]       = 0x00a0,
-       [TXALCR1]       = 0x00a0,
+       [TXALCR1]       = 0x00a4,
        [RXNLCR1]       = 0x00a8,
        [RXALCR1]       = 0x00ac,
        [FWNLCR1]       = 0x00b0,
@@ -3225,18 +3225,37 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
        /* ioremap the TSU registers */
        if (mdp->cd->tsu) {
                struct resource *rtsu;
+
                rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-               mdp->tsu_addr = devm_ioremap_resource(&pdev->dev, rtsu);
-               if (IS_ERR(mdp->tsu_addr)) {
-                       ret = PTR_ERR(mdp->tsu_addr);
+               if (!rtsu) {
+                       dev_err(&pdev->dev, "no TSU resource\n");
+                       ret = -ENODEV;
+                       goto out_release;
+               }
+               /* We can only request the  TSU region  for the first port
+                * of the two  sharing this TSU for the probe to succeed...
+                */
+               if (devno % 2 == 0 &&
+                   !devm_request_mem_region(&pdev->dev, rtsu->start,
+                                            resource_size(rtsu),
+                                            dev_name(&pdev->dev))) {
+                       dev_err(&pdev->dev, "can't request TSU resource.\n");
+                       ret = -EBUSY;
+                       goto out_release;
+               }
+               mdp->tsu_addr = devm_ioremap(&pdev->dev, rtsu->start,
+                                            resource_size(rtsu));
+               if (!mdp->tsu_addr) {
+                       dev_err(&pdev->dev, "TSU region ioremap() failed.\n");
+                       ret = -ENOMEM;
                        goto out_release;
                }
                mdp->port = devno % 2;
                ndev->features = NETIF_F_HW_VLAN_CTAG_FILTER;
        }
 
-       /* initialize first or needed device */
-       if (!devno || pd->needs_init) {
+       /* Need to init only the first port of the two sharing a TSU */
+       if (devno % 2 == 0) {
                if (mdp->cd->chip_reset)
                        mdp->cd->chip_reset(ndev);
 
index e1e5ac0537606f2192d553c85795428b18fd615d..ce2ea2d491acac195eefe3f01f8ec9df08d3e77c 100644 (file)
@@ -409,7 +409,7 @@ struct stmmac_desc_ops {
        /* get timestamp value */
         u64(*get_timestamp) (void *desc, u32 ats);
        /* get rx timestamp status */
-       int (*get_rx_timestamp_status) (void *desc, u32 ats);
+       int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
        /* Display ring */
        void (*display_ring)(void *head, unsigned int size, bool rx);
        /* set MSS via context descriptor */
index 4b286e27c4ca5cdbbb7c457e31bef1b2e9e7bd94..7e089bf906b4f316034403f9a44fbfd191ee09eb 100644 (file)
@@ -258,7 +258,8 @@ static int dwmac4_rx_check_timestamp(void *desc)
        return ret;
 }
 
-static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
+static int dwmac4_wrback_get_rx_timestamp_status(void *desc, void *next_desc,
+                                                u32 ats)
 {
        struct dma_desc *p = (struct dma_desc *)desc;
        int ret = -EINVAL;
@@ -270,7 +271,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
 
                        /* Check if timestamp is OK from context descriptor */
                        do {
-                               ret = dwmac4_rx_check_timestamp(desc);
+                               ret = dwmac4_rx_check_timestamp(next_desc);
                                if (ret < 0)
                                        goto exit;
                                i++;
index 7546b3664113a3d776fe19094df71b2adfb99e98..2a828a31281423082995bc332ec51a3f20989804 100644 (file)
@@ -400,7 +400,8 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
        return ns;
 }
 
-static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats)
+static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
+                                           u32 ats)
 {
        if (ats) {
                struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
index f817f8f365696d3388e73f85710d30dde43a7d41..db4cee57bb2465eb98fe38cb947624e779da4673 100644 (file)
@@ -265,7 +265,7 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
        return ns;
 }
 
-static int ndesc_get_rx_timestamp_status(void *desc, u32 ats)
+static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
 {
        struct dma_desc *p = (struct dma_desc *)desc;
 
index 721b616552611aa74ea077e744ec9a0c4836a48f..08c19ebd530674972ceb9ebcb41cd7af4b3fb58d 100644 (file)
@@ -34,6 +34,7 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
 {
        u32 value = readl(ioaddr + PTP_TCR);
        unsigned long data;
+       u32 reg_value;
 
        /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
         *      formula = (1/ptp_clock) * 1000000000
@@ -50,10 +51,11 @@ static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
 
        data &= PTP_SSIR_SSINC_MASK;
 
+       reg_value = data;
        if (gmac4)
-               data = data << GMAC4_PTP_SSIR_SSINC_SHIFT;
+               reg_value <<= GMAC4_PTP_SSIR_SSINC_SHIFT;
 
-       writel(data, ioaddr + PTP_SSIR);
+       writel(reg_value, ioaddr + PTP_SSIR);
 
        return data;
 }
index d7250539d0bd0c61c92fc9460c9e1197bb57ac8f..c0af0bc4e714891080e966c7d6ddd5e47e8ab5a5 100644 (file)
@@ -364,9 +364,15 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
 bool stmmac_eee_init(struct stmmac_priv *priv)
 {
        struct net_device *ndev = priv->dev;
+       int interface = priv->plat->interface;
        unsigned long flags;
        bool ret = false;
 
+       if ((interface != PHY_INTERFACE_MODE_MII) &&
+           (interface != PHY_INTERFACE_MODE_GMII) &&
+           !phy_interface_mode_is_rgmii(interface))
+               goto out;
+
        /* Using PCS we cannot dial with the phy registers at this stage
         * so we do not support extra feature like EEE.
         */
@@ -482,7 +488,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
                desc = np;
 
        /* Check if timestamp is available */
-       if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) {
+       if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
                ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
                netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
                shhwtstamp = skb_hwtstamps(skb);
index b718a02a6bb6055d7841619b42cb400d1eea0984..0a48b3073d3d3614483e9d8ae64f073a5b3d2ead 100644 (file)
@@ -825,6 +825,13 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
        if (IS_ERR(rt))
                return PTR_ERR(rt);
 
+       if (skb_dst(skb)) {
+               int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) -
+                         GENEVE_BASE_HLEN - info->options_len - 14;
+
+               skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+       }
+
        sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
        if (geneve->collect_md) {
                tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
@@ -864,6 +871,13 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
        if (IS_ERR(dst))
                return PTR_ERR(dst);
 
+       if (skb_dst(skb)) {
+               int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) -
+                         GENEVE_BASE_HLEN - info->options_len - 14;
+
+               skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+       }
+
        sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
        if (geneve->collect_md) {
                prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
index a178c5efd33e5486d47608dd7e4d7218b8eda4a1..a0f2be81d52e4a4fd24585d8957b181a3cf9a103 100644 (file)
@@ -1444,9 +1444,14 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
        return 0;
 
 unregister_netdev:
+       /* macvlan_uninit would free the macvlan port */
        unregister_netdevice(dev);
+       return err;
 destroy_macvlan_port:
-       if (create)
+       /* the macvlan port may be freed by macvlan_uninit when fail to register.
+        * so we destroy the macvlan port only when it's valid.
+        */
+       if (create && macvlan_port_get_rtnl(dev))
                macvlan_port_destroy(port->dev);
        return err;
 }
index b5a8f750e4337ce04fc46b3cf784cf84a2c42c77..82104edca393b9b6662a18ef8ea0bdd8d3bb057d 100644 (file)
@@ -879,6 +879,8 @@ static int m88e1510_config_init(struct phy_device *phydev)
 
        /* SGMII-to-Copper mode initialization */
        if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+               u32 pause;
+
                /* Select page 18 */
                err = marvell_set_page(phydev, 18);
                if (err < 0)
@@ -902,6 +904,16 @@ static int m88e1510_config_init(struct phy_device *phydev)
                err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
                if (err < 0)
                        return err;
+
+               /* There appears to be a bug in the 88e1512 when used in
+                * SGMII to copper mode, where the AN advertisment register
+                * clears the pause bits each time a negotiation occurs.
+                * This means we can never be truely sure what was advertised,
+                * so disable Pause support.
+                */
+               pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               phydev->supported &= ~pause;
+               phydev->advertising &= ~pause;
        }
 
        return m88e1121_config_init(phydev);
@@ -2073,7 +2085,7 @@ static struct phy_driver marvell_drivers[] = {
                .flags = PHY_HAS_INTERRUPT,
                .probe = marvell_probe,
                .config_init = &m88e1145_config_init,
-               .config_aneg = &marvell_config_aneg,
+               .config_aneg = &m88e1101_config_aneg,
                .read_status = &genphy_read_status,
                .ack_interrupt = &marvell_ack_interrupt,
                .config_intr = &marvell_config_intr,
index 135296508a7ed70661f7e8f9aa9c0eb7fadf3133..6425ce04d3f95d6ca7a7b2ec7a22b7f860358a1e 100644 (file)
@@ -118,8 +118,10 @@ static int sun4i_mdio_probe(struct platform_device *pdev)
 
        data->regulator = devm_regulator_get(&pdev->dev, "phy");
        if (IS_ERR(data->regulator)) {
-               if (PTR_ERR(data->regulator) == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
+               if (PTR_ERR(data->regulator) == -EPROBE_DEFER) {
+                       ret = -EPROBE_DEFER;
+                       goto err_out_free_mdiobus;
+               }
 
                dev_info(&pdev->dev, "no regulator found\n");
                data->regulator = NULL;
index bfd3090fb055bac4c40924205036119da5b6ce61..07c6048200c6164ac77a649468063e2fedd404c6 100644 (file)
@@ -194,8 +194,11 @@ static int xgene_mdio_reset(struct xgene_mdio_pdata *pdata)
        }
 
        ret = xgene_enet_ecc_init(pdata);
-       if (ret)
+       if (ret) {
+               if (pdata->dev->of_node)
+                       clk_disable_unprepare(pdata->clk);
                return ret;
+       }
        xgene_gmac_reset(pdata);
 
        return 0;
@@ -388,8 +391,10 @@ static int xgene_mdio_probe(struct platform_device *pdev)
                return ret;
 
        mdio_bus = mdiobus_alloc();
-       if (!mdio_bus)
-               return -ENOMEM;
+       if (!mdio_bus) {
+               ret = -ENOMEM;
+               goto out_clk;
+       }
 
        mdio_bus->name = "APM X-Gene MDIO bus";
 
@@ -418,7 +423,7 @@ static int xgene_mdio_probe(struct platform_device *pdev)
                mdio_bus->phy_mask = ~0;
                ret = mdiobus_register(mdio_bus);
                if (ret)
-                       goto out;
+                       goto out_mdiobus;
 
                acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_HANDLE(dev), 1,
                                    acpi_register_phy, NULL, mdio_bus, NULL);
@@ -426,16 +431,20 @@ static int xgene_mdio_probe(struct platform_device *pdev)
        }
 
        if (ret)
-               goto out;
+               goto out_mdiobus;
 
        pdata->mdio_bus = mdio_bus;
        xgene_mdio_status = true;
 
        return 0;
 
-out:
+out_mdiobus:
        mdiobus_free(mdio_bus);
 
+out_clk:
+       if (dev->of_node)
+               clk_disable_unprepare(pdata->clk);
+
        return ret;
 }
 
index ab4614113403455c1eee1c2ad69c7cebc6da5c9d..422ff6333c52da8c4a212123bdaf443945613e02 100644 (file)
@@ -624,6 +624,7 @@ static int ksz9031_read_status(struct phy_device *phydev)
                phydev->link = 0;
                if (phydev->drv->config_intr && phy_interrupt_is_valid(phydev))
                        phydev->drv->config_intr(phydev);
+               return genphy_config_aneg(phydev);
        }
 
        return 0;
index 5dc9668dde34fe6b810c48f2bcd63e8609caa74e..249ce5cbea2201902563ceb6b9339809e8ccb99e 100644 (file)
@@ -526,6 +526,7 @@ struct phylink *phylink_create(struct net_device *ndev, struct device_node *np,
        pl->link_config.pause = MLO_PAUSE_AN;
        pl->link_config.speed = SPEED_UNKNOWN;
        pl->link_config.duplex = DUPLEX_UNKNOWN;
+       pl->link_config.an_enabled = true;
        pl->ops = ops;
        __set_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
 
@@ -951,6 +952,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
        mutex_lock(&pl->state_mutex);
        /* Configure the MAC to match the new settings */
        linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising);
+       pl->link_config.interface = config.interface;
        pl->link_config.speed = our_kset.base.speed;
        pl->link_config.duplex = our_kset.base.duplex;
        pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;
@@ -1294,6 +1296,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
                switch (cmd) {
                case SIOCGMIIPHY:
                        mii->phy_id = pl->phydev->mdio.addr;
+                       /* fall through */
 
                case SIOCGMIIREG:
                        ret = phylink_phy_read(pl, mii->phy_id, mii->reg_num);
@@ -1316,6 +1319,7 @@ int phylink_mii_ioctl(struct phylink *pl, struct ifreq *ifr, int cmd)
                switch (cmd) {
                case SIOCGMIIPHY:
                        mii->phy_id = 0;
+                       /* fall through */
 
                case SIOCGMIIREG:
                        ret = phylink_mii_read(pl, mii->phy_id, mii->reg_num);
@@ -1427,9 +1431,8 @@ static void phylink_sfp_link_down(void *upstream)
        WARN_ON(!lockdep_rtnl_is_held());
 
        set_bit(PHYLINK_DISABLE_LINK, &pl->phylink_disable_state);
+       queue_work(system_power_efficient_wq, &pl->resolve);
        flush_work(&pl->resolve);
-
-       netif_carrier_off(pl->netdev);
 }
 
 static void phylink_sfp_link_up(void *upstream)
index 8a1b1f4c1b7c675278902ec78c31a632df44351f..ab64a142b832c20328f73c691fb566b78c2c8399 100644 (file)
@@ -356,7 +356,8 @@ EXPORT_SYMBOL_GPL(sfp_register_upstream);
 void sfp_unregister_upstream(struct sfp_bus *bus)
 {
        rtnl_lock();
-       sfp_unregister_bus(bus);
+       if (bus->sfp)
+               sfp_unregister_bus(bus);
        bus->upstream = NULL;
        bus->netdev = NULL;
        rtnl_unlock();
@@ -459,7 +460,8 @@ EXPORT_SYMBOL_GPL(sfp_register_socket);
 void sfp_unregister_socket(struct sfp_bus *bus)
 {
        rtnl_lock();
-       sfp_unregister_bus(bus);
+       if (bus->netdev)
+               sfp_unregister_bus(bus);
        bus->sfp_dev = NULL;
        bus->sfp = NULL;
        bus->socket_ops = NULL;
index 3000ddd1c7e2e481bb961deb86099b5c2ea11371..728819feab44db75f6b4c369767c3ed6c1966cbf 100644 (file)
@@ -1100,6 +1100,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
        {QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
        {QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
+       {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */
        {QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
        {QMI_FIXED_INTF(0x12d1, 0x140c, 1)},    /* Huawei E173 */
        {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},    /* Huawei E1820 */
index 19b9cc51079e75346af766c91786d66eaa92c3f2..31f4b7911ef84c85789011332e37c5314099d82c 100644 (file)
@@ -2155,6 +2155,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                }
 
                ndst = &rt->dst;
+               if (skb_dst(skb)) {
+                       int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
+
+                       skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+                                                      skb, mtu);
+               }
+
                tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
                err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@ -2190,6 +2197,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                goto out_unlock;
                }
 
+               if (skb_dst(skb)) {
+                       int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
+
+                       skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
+                                                      skb, mtu);
+               }
+
                tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                ttl = ttl ? : ip6_dst_hoplimit(ndst);
                skb_scrub_packet(skb, xnet);
@@ -3103,6 +3117,11 @@ static void vxlan_config_apply(struct net_device *dev,
 
                max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
                                           VXLAN_HEADROOM);
+               if (max_mtu < ETH_MIN_MTU)
+                       max_mtu = ETH_MIN_MTU;
+
+               if (!changelink && !conf->mtu)
+                       dev->mtu = max_mtu;
        }
 
        if (dev->mtu > max_mtu)
index f7d228b5ba933f744474be119802f41e461c5715..987f1252a3cf888d76432ee4f55b1bd215358209 100644 (file)
@@ -384,6 +384,18 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed)
                }
        }
 
+       if (changed & IEEE80211_CONF_CHANGE_PS) {
+               list_for_each_entry(tmp, &wcn->vif_list, list) {
+                       vif = wcn36xx_priv_to_vif(tmp);
+                       if (hw->conf.flags & IEEE80211_CONF_PS) {
+                               if (vif->bss_conf.ps) /* ps allowed ? */
+                                       wcn36xx_pmc_enter_bmps_state(wcn, vif);
+                       } else {
+                               wcn36xx_pmc_exit_bmps_state(wcn, vif);
+                       }
+               }
+       }
+
        mutex_unlock(&wcn->conf_mutex);
 
        return 0;
@@ -747,17 +759,6 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
                vif_priv->dtim_period = bss_conf->dtim_period;
        }
 
-       if (changed & BSS_CHANGED_PS) {
-               wcn36xx_dbg(WCN36XX_DBG_MAC,
-                           "mac bss PS set %d\n",
-                           bss_conf->ps);
-               if (bss_conf->ps) {
-                       wcn36xx_pmc_enter_bmps_state(wcn, vif);
-               } else {
-                       wcn36xx_pmc_exit_bmps_state(wcn, vif);
-               }
-       }
-
        if (changed & BSS_CHANGED_BSSID) {
                wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss changed_bssid %pM\n",
                            bss_conf->bssid);
index 589fe5f7097160a4bcee5d1b1be979ce389e297c..1976b80c235fe5e4c5c591a761c1dc8d1d7b941b 100644 (file)
@@ -45,8 +45,10 @@ int wcn36xx_pmc_exit_bmps_state(struct wcn36xx *wcn,
        struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
 
        if (WCN36XX_BMPS != vif_priv->pw_state) {
-               wcn36xx_err("Not in BMPS mode, no need to exit from BMPS mode!\n");
-               return -EINVAL;
+               /* Unbalanced call or last BMPS enter failed */
+               wcn36xx_dbg(WCN36XX_DBG_PMC,
+                           "Not in BMPS mode, no need to exit\n");
+               return -EALREADY;
        }
        wcn36xx_smd_exit_bmps(wcn, vif);
        vif_priv->pw_state = WCN36XX_FULL_POWER;
index d749abeca3ae99866c33ffce095d09cfce8c8ff0..403e65c309d0ac8e22fa84f163c636b18bc5872c 100644 (file)
@@ -670,11 +670,15 @@ static inline u8 iwl_pcie_get_cmd_index(struct iwl_txq *q, u32 index)
        return index & (q->n_window - 1);
 }
 
-static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
+static inline void *iwl_pcie_get_tfd(struct iwl_trans *trans,
                                     struct iwl_txq *txq, int idx)
 {
-       return txq->tfds + trans_pcie->tfd_size * iwl_pcie_get_cmd_index(txq,
-                                                                        idx);
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+       if (trans->cfg->use_tfh)
+               idx = iwl_pcie_get_cmd_index(txq, idx);
+
+       return txq->tfds + trans_pcie->tfd_size * idx;
 }
 
 static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
index 16b345f54ff0003db66f70f547338558bc7c4703..6d0a907d5ba58f8666d06e65744d1c179edf3013 100644 (file)
@@ -171,8 +171,6 @@ static void iwl_pcie_gen2_tfd_unmap(struct iwl_trans *trans,
 
 static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
 {
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-
        /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
         * idx is bounded by n_window
         */
@@ -181,7 +179,7 @@ static void iwl_pcie_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
        lockdep_assert_held(&txq->lock);
 
        iwl_pcie_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
-                               iwl_pcie_get_tfd(trans_pcie, txq, idx));
+                               iwl_pcie_get_tfd(trans, txq, idx));
 
        /* free SKB */
        if (txq->entries) {
@@ -364,11 +362,9 @@ struct iwl_tfh_tfd *iwl_pcie_gen2_build_tfd(struct iwl_trans *trans,
                                            struct sk_buff *skb,
                                            struct iwl_cmd_meta *out_meta)
 {
-       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
        int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
-       struct iwl_tfh_tfd *tfd =
-               iwl_pcie_get_tfd(trans_pcie, txq, idx);
+       struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, idx);
        dma_addr_t tb_phys;
        bool amsdu;
        int i, len, tb1_len, tb2_len, hdr_len;
@@ -565,8 +561,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
        u8 group_id = iwl_cmd_groupid(cmd->id);
        const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
        u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
-       struct iwl_tfh_tfd *tfd =
-               iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
+       struct iwl_tfh_tfd *tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
 
        memset(tfd, 0, sizeof(*tfd));
 
index fed6d842a5e1dc444fe58ad203cb8fb5e3ab1c6f..3f85713c41dcc9291130f25873ef97bd702335f1 100644 (file)
@@ -373,7 +373,7 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
 {
        struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        int i, num_tbs;
-       void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
+       void *tfd = iwl_pcie_get_tfd(trans, txq, index);
 
        /* Sanity check on number of chunks */
        num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
@@ -2018,7 +2018,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
        }
 
        trace_iwlwifi_dev_tx(trans->dev, skb,
-                            iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+                            iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
                             trans_pcie->tfd_size,
                             &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
                             hdr_len);
@@ -2092,7 +2092,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
                IEEE80211_CCMP_HDR_LEN : 0;
 
        trace_iwlwifi_dev_tx(trans->dev, skb,
-                            iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+                            iwl_pcie_get_tfd(trans, txq, txq->write_ptr),
                             trans_pcie->tfd_size,
                             &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
 
@@ -2425,7 +2425,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
        memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
               IWL_FIRST_TB_SIZE);
 
-       tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
+       tfd = iwl_pcie_get_tfd(trans, txq, txq->write_ptr);
        /* Set up entry for this TFD in Tx byte-count array */
        iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
                                         iwl_pcie_tfd_get_num_tbs(trans, tfd));
index 10b075a46b266218c53d1e5674c1789e1e0f3d80..e8189c07b41f6b450f135ef703ec4e01568e311d 100644 (file)
@@ -684,6 +684,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
        hdr = skb_put(skb, sizeof(*hdr) - ETH_ALEN);
        hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
                                         IEEE80211_STYPE_NULLFUNC |
+                                        IEEE80211_FCTL_TODS |
                                         (ps ? IEEE80211_FCTL_PM : 0));
        hdr->duration_id = cpu_to_le16(0);
        memcpy(hdr->addr1, vp->bssid, ETH_ALEN);
@@ -3215,7 +3216,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info)
                if (!net_eq(wiphy_net(data->hw->wiphy), genl_info_net(info)))
                        continue;
 
-               skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+               skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
                if (!skb) {
                        res = -ENOMEM;
                        goto out_err;
index c5a34671abdaf78a1257b869af817ecd148a6e0c..9bd7ddeeb6a5c7b29569e51ad2f469d9fe493a76 100644 (file)
@@ -1326,6 +1326,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
 
        netif_carrier_off(netdev);
 
+       xenbus_switch_state(dev, XenbusStateInitialising);
        return netdev;
 
  exit:
index e949e3302af4743472ac26c68e9f4d54a27bb11a..c586bcdb5190b1c9f6447cb9a380568a03840988 100644 (file)
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
        return ret;
 }
 
-static int btt_log_read_pair(struct arena_info *arena, u32 lane,
-                       struct log_entry *ent)
+static int btt_log_group_read(struct arena_info *arena, u32 lane,
+                       struct log_group *log)
 {
        return arena_read_bytes(arena,
-                       arena->logoff + (2 * lane * LOG_ENT_SIZE), ent,
-                       2 * LOG_ENT_SIZE, 0);
+                       arena->logoff + (lane * LOG_GRP_SIZE), log,
+                       LOG_GRP_SIZE, 0);
 }
 
 static struct dentry *debugfs_root;
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
        debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
        debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
        debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
+       debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
+       debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
 }
 
 static void btt_debugfs_init(struct btt *btt)
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt)
        }
 }
 
+static u32 log_seq(struct log_group *log, int log_idx)
+{
+       return le32_to_cpu(log->ent[log_idx].seq);
+}
+
 /*
  * This function accepts two log entries, and uses the
  * sequence number to find the 'older' entry.
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt)
  *
  * TODO The logic feels a bit kludge-y. make it better..
  */
-static int btt_log_get_old(struct log_entry *ent)
+static int btt_log_get_old(struct arena_info *a, struct log_group *log)
 {
+       int idx0 = a->log_index[0];
+       int idx1 = a->log_index[1];
        int old;
 
        /*
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent)
         * the next time, the following logic works out to put this
         * (next) entry into [1]
         */
-       if (ent[0].seq == 0) {
-               ent[0].seq = cpu_to_le32(1);
+       if (log_seq(log, idx0) == 0) {
+               log->ent[idx0].seq = cpu_to_le32(1);
                return 0;
        }
 
-       if (ent[0].seq == ent[1].seq)
+       if (log_seq(log, idx0) == log_seq(log, idx1))
                return -EINVAL;
-       if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5)
+       if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
                return -EINVAL;
 
-       if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) {
-               if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1)
+       if (log_seq(log, idx0) < log_seq(log, idx1)) {
+               if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
                        old = 0;
                else
                        old = 1;
        } else {
-               if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1)
+               if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
                        old = 1;
                else
                        old = 0;
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
 {
        int ret;
        int old_ent, ret_ent;
-       struct log_entry log[2];
+       struct log_group log;
 
-       ret = btt_log_read_pair(arena, lane, log);
+       ret = btt_log_group_read(arena, lane, &log);
        if (ret)
                return -EIO;
 
-       old_ent = btt_log_get_old(log);
+       old_ent = btt_log_get_old(arena, &log);
        if (old_ent < 0 || old_ent > 1) {
                dev_err(to_dev(arena),
                                "log corruption (%d): lane %d seq [%d, %d]\n",
-                       old_ent, lane, log[0].seq, log[1].seq);
+                               old_ent, lane, log.ent[arena->log_index[0]].seq,
+                               log.ent[arena->log_index[1]].seq);
                /* TODO set error state? */
                return -EIO;
        }
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
        ret_ent = (old_flag ? old_ent : (1 - old_ent));
 
        if (ent != NULL)
-               memcpy(ent, &log[ret_ent], LOG_ENT_SIZE);
+               memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
 
        return ret_ent;
 }
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane,
                        u32 sub, struct log_entry *ent, unsigned long flags)
 {
        int ret;
-       /*
-        * Ignore the padding in log_entry for calculating log_half.
-        * The entry is 'committed' when we write the sequence number,
-        * and we want to ensure that that is the last thing written.
-        * We don't bother writing the padding as that would be extra
-        * media wear and write amplification
-        */
-       unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2;
-       u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE);
+       u32 group_slot = arena->log_index[sub];
+       unsigned int log_half = LOG_ENT_SIZE / 2;
        void *src = ent;
+       u64 ns_off;
 
+       ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
+               (group_slot * LOG_ENT_SIZE);
        /* split the 16B write into atomic, durable halves */
        ret = arena_write_bytes(arena, ns_off, src, log_half, flags);
        if (ret)
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena)
 {
        size_t logsize = arena->info2off - arena->logoff;
        size_t chunk_size = SZ_4K, offset = 0;
-       struct log_entry log;
+       struct log_entry ent;
        void *zerobuf;
        int ret;
        u32 i;
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena)
        }
 
        for (i = 0; i < arena->nfree; i++) {
-               log.lba = cpu_to_le32(i);
-               log.old_map = cpu_to_le32(arena->external_nlba + i);
-               log.new_map = cpu_to_le32(arena->external_nlba + i);
-               log.seq = cpu_to_le32(LOG_SEQ_INIT);
-               ret = __btt_log_write(arena, i, 0, &log, 0);
+               ent.lba = cpu_to_le32(i);
+               ent.old_map = cpu_to_le32(arena->external_nlba + i);
+               ent.new_map = cpu_to_le32(arena->external_nlba + i);
+               ent.seq = cpu_to_le32(LOG_SEQ_INIT);
+               ret = __btt_log_write(arena, i, 0, &ent, 0);
                if (ret)
                        goto free;
        }
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena)
        return 0;
 }
 
+static bool ent_is_padding(struct log_entry *ent)
+{
+       return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
+               && (ent->seq == 0);
+}
+
+/*
+ * Detecting valid log indices: We read a log group (see the comments in btt.h
+ * for a description of a 'log_group' and its 'slots'), and iterate over its
+ * four slots. We expect that a padding slot will be all-zeroes, and use this
+ * to detect a padding slot vs. an actual entry.
+ *
+ * If a log_group is in the initial state, i.e. hasn't been used since the
+ * creation of this BTT layout, it will have three of the four slots with
+ * zeroes. We skip over these log_groups for the detection of log_index. If
+ * all log_groups are in the initial state (i.e. the BTT has never been
+ * written to), it is safe to assume the 'new format' of log entries in slots
+ * (0, 1).
+ */
+static int log_set_indices(struct arena_info *arena)
+{
+       bool idx_set = false, initial_state = true;
+       int ret, log_index[2] = {-1, -1};
+       u32 i, j, next_idx = 0;
+       struct log_group log;
+       u32 pad_count = 0;
+
+       for (i = 0; i < arena->nfree; i++) {
+               ret = btt_log_group_read(arena, i, &log);
+               if (ret < 0)
+                       return ret;
+
+               for (j = 0; j < 4; j++) {
+                       if (!idx_set) {
+                               if (ent_is_padding(&log.ent[j])) {
+                                       pad_count++;
+                                       continue;
+                               } else {
+                                       /* Skip if index has been recorded */
+                                       if ((next_idx == 1) &&
+                                               (j == log_index[0]))
+                                               continue;
+                                       /* valid entry, record index */
+                                       log_index[next_idx] = j;
+                                       next_idx++;
+                               }
+                               if (next_idx == 2) {
+                                       /* two valid entries found */
+                                       idx_set = true;
+                               } else if (next_idx > 2) {
+                                       /* too many valid indices */
+                                       return -ENXIO;
+                               }
+                       } else {
+                               /*
+                                * once the indices have been set, just verify
+                                * that all subsequent log groups are either in
+                                * their initial state or follow the same
+                                * indices.
+                                */
+                               if (j == log_index[0]) {
+                                       /* entry must be 'valid' */
+                                       if (ent_is_padding(&log.ent[j]))
+                                               return -ENXIO;
+                               } else if (j == log_index[1]) {
+                                       ;
+                                       /*
+                                        * log_index[1] can be padding if the
+                                        * lane never got used and it is still
+                                        * in the initial state (three 'padding'
+                                        * entries)
+                                        */
+                               } else {
+                                       /* entry must be invalid (padding) */
+                                       if (!ent_is_padding(&log.ent[j]))
+                                               return -ENXIO;
+                               }
+                       }
+               }
+               /*
+                * If any of the log_groups have more than one valid,
+                * non-padding entry, then the we are no longer in the
+                * initial_state
+                */
+               if (pad_count < 3)
+                       initial_state = false;
+               pad_count = 0;
+       }
+
+       if (!initial_state && !idx_set)
+               return -ENXIO;
+
+       /*
+        * If all the entries in the log were in the initial state,
+        * assume new padding scheme
+        */
+       if (initial_state)
+               log_index[1] = 1;
+
+       /*
+        * Only allow the known permutations of log/padding indices,
+        * i.e. (0, 1), and (0, 2)
+        */
+       if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
+               ; /* known index possibilities */
+       else {
+               dev_err(to_dev(arena), "Found an unknown padding scheme\n");
+               return -ENXIO;
+       }
+
+       arena->log_index[0] = log_index[0];
+       arena->log_index[1] = log_index[1];
+       dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
+       dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
+       return 0;
+}
+
 static int btt_rtt_init(struct arena_info *arena)
 {
        arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
        available -= 2 * BTT_PG_SIZE;
 
        /* The log takes a fixed amount of space based on nfree */
-       logsize = roundup(2 * arena->nfree * sizeof(struct log_entry),
-                               BTT_PG_SIZE);
+       logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
        available -= logsize;
 
        /* Calculate optimal split between map and data area */
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
        arena->mapoff = arena->dataoff + datasize;
        arena->logoff = arena->mapoff + mapsize;
        arena->info2off = arena->logoff + logsize;
+
+       /* Default log indices are (0,1) */
+       arena->log_index[0] = 0;
+       arena->log_index[1] = 1;
        return arena;
 }
 
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt)
                arena->external_lba_start = cur_nlba;
                parse_arena_meta(arena, super, cur_off);
 
+               ret = log_set_indices(arena);
+               if (ret) {
+                       dev_err(to_dev(arena),
+                               "Unable to deduce log/padding indices\n");
+                       goto out;
+               }
+
                mutex_init(&arena->err_lock);
                ret = btt_freelist_init(arena);
                if (ret)
index 578c2057524d396fbf7c2eee88804b58c1f17cfe..db3cb6d4d0d495df8978494ff6619e2923478d32 100644 (file)
@@ -27,6 +27,7 @@
 #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
 #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
 #define MAP_ENT_NORMAL 0xC0000000
+#define LOG_GRP_SIZE sizeof(struct log_group)
 #define LOG_ENT_SIZE sizeof(struct log_entry)
 #define ARENA_MIN_SIZE (1UL << 24)     /* 16 MB */
 #define ARENA_MAX_SIZE (1ULL << 39)    /* 512 GB */
@@ -50,12 +51,52 @@ enum btt_init_state {
        INIT_READY
 };
 
+/*
+ * A log group represents one log 'lane', and consists of four log entries.
+ * Two of the four entries are valid entries, and the remaining two are
+ * padding. Due to an old bug in the padding location, we need to perform a
+ * test to determine the padding scheme being used, and use that scheme
+ * thereafter.
+ *
+ * In kernels prior to 4.15, 'log group' would have actual log entries at
+ * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
+ * format has log entries at indices (0, 1) and padding at indices (2, 3).
+ *
+ * Old (pre 4.15) format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq |       pad       |
+ * +-----------------+-----------------+
+ *
+ * New format:
+ * +-----------------+-----------------+
+ * |      ent[0]     |      ent[1]     |
+ * |       16B       |       16B       |
+ * | lba/old/new/seq | lba/old/new/seq |
+ * +-----------------------------------+
+ * |      ent[2]     |      ent[3]     |
+ * |       16B       |       16B       |
+ * |       pad       |       pad       |
+ * +-----------------+-----------------+
+ *
+ * We detect during start-up which format is in use, and set
+ * arena->log_index[(0, 1)] with the detected format.
+ */
+
 struct log_entry {
        __le32 lba;
        __le32 old_map;
        __le32 new_map;
        __le32 seq;
-       __le64 padding[2];
+};
+
+struct log_group {
+       struct log_entry ent[4];
 };
 
 struct btt_sb {
@@ -125,6 +166,8 @@ struct aligned_lock {
  * @list:              List head for list of arenas
  * @debugfs_dir:       Debugfs dentry
  * @flags:             Arena flags - may signify error states.
+ * @err_lock:          Mutex for synchronizing error clearing.
+ * @log_index:         Indices of the valid log entries in a log_group
  *
  * arena_info is a per-arena handle. Once an arena is narrowed down for an
  * IO, this struct is passed around for the duration of the IO.
@@ -157,6 +200,7 @@ struct arena_info {
        /* Arena flags */
        u32 flags;
        struct mutex err_lock;
+       int log_index[2];
 };
 
 /**
@@ -176,6 +220,7 @@ struct arena_info {
  * @init_lock:         Mutex used for the BTT initialization
  * @init_state:                Flag describing the initialization state for the BTT
  * @num_arenas:                Number of arenas in the BTT instance
+ * @phys_bb:           Pointer to the namespace's badblocks structure
  */
 struct btt {
        struct gendisk *btt_disk;
index 65cc171c721de8774baf05f9650f3bbacf511eec..2adada1a58551776186d6f6928a437d462734a48 100644 (file)
@@ -364,9 +364,9 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 {
        u64 checksum, offset;
-       unsigned long align;
        enum nd_pfn_mode mode;
        struct nd_namespace_io *nsio;
+       unsigned long align, start_pad;
        struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
        struct nd_namespace_common *ndns = nd_pfn->ndns;
        const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
@@ -410,6 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 
        align = le32_to_cpu(pfn_sb->align);
        offset = le64_to_cpu(pfn_sb->dataoff);
+       start_pad = le32_to_cpu(pfn_sb->start_pad);
        if (align == 0)
                align = 1UL << ilog2(offset);
        mode = le32_to_cpu(pfn_sb->mode);
@@ -468,7 +469,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
                return -EBUSY;
        }
 
-       if ((align && !IS_ALIGNED(offset, align))
+       if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
                        || !IS_ALIGNED(offset, PAGE_SIZE)) {
                dev_err(&nd_pfn->dev,
                                "bad offset: %#llx dax disabled align: %#lx\n",
@@ -582,6 +583,12 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
        return altmap;
 }
 
+static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
+{
+       return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
+                       ALIGN_DOWN(phys, nd_pfn->align));
+}
+
 static int nd_pfn_init(struct nd_pfn *nd_pfn)
 {
        u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
@@ -637,13 +644,16 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
        start = nsio->res.start;
        size = PHYS_SECTION_ALIGN_UP(start + size) - start;
        if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
-                               IORES_DESC_NONE) == REGION_MIXED) {
+                               IORES_DESC_NONE) == REGION_MIXED
+                       || !IS_ALIGNED(start + resource_size(&nsio->res),
+                               nd_pfn->align)) {
                size = resource_size(&nsio->res);
-               end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
+               end_trunc = start + size - phys_pmem_align_down(nd_pfn,
+                               start + size);
        }
 
        if (start_pad + end_trunc)
-               dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
+               dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
                                dev_name(&ndns->dev), start_pad + end_trunc);
 
        /*
index f837d666cbd499c8e33a1514f55344a1796005a1..839650e0926af1aaaf21bafbdc1baa79cf907d76 100644 (file)
@@ -1287,7 +1287,7 @@ static void nvme_config_discard(struct nvme_ctrl *ctrl,
        BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                        NVME_DSM_MAX_RANGES);
 
-       queue->limits.discard_alignment = size;
+       queue->limits.discard_alignment = 0;
        queue->limits.discard_granularity = size;
 
        blk_queue_max_discard_sectors(queue, UINT_MAX);
@@ -1335,6 +1335,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
                struct nvme_ns *ns, struct nvme_id_ns *id)
 {
        sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
+       unsigned short bs = 1 << ns->lba_shift;
        unsigned stream_alignment = 0;
 
        if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
@@ -1343,7 +1344,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
        blk_mq_freeze_queue(disk->queue);
        blk_integrity_unregister(disk);
 
-       blk_queue_logical_block_size(disk->queue, 1 << ns->lba_shift);
+       blk_queue_logical_block_size(disk->queue, bs);
+       blk_queue_physical_block_size(disk->queue, bs);
+       blk_queue_io_min(disk->queue, bs);
+
        if (ns->ms && !ns->ext &&
            (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
                nvme_init_integrity(disk, ns->ms, ns->pi_type);
@@ -1705,7 +1709,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
                blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
                blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
        }
-       if (ctrl->quirks & NVME_QUIRK_STRIPE_SIZE)
+       if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+           is_power_of_2(ctrl->max_hw_sectors))
                blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
        blk_queue_virt_boundary(q, ctrl->page_size - 1);
        if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
@@ -2869,7 +2874,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
        nvme_set_queue_limits(ctrl, ns->queue);
-       nvme_setup_streams_ns(ctrl, ns);
 
        id = nvme_identify_ns(ctrl, nsid);
        if (!id)
@@ -2880,6 +2884,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
        if (nvme_init_ns_head(ns, nsid, id, &new))
                goto out_free_id;
+       nvme_setup_streams_ns(ctrl, ns);
        
 #ifdef CONFIG_NVME_MULTIPATH
        /*
@@ -2965,8 +2970,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                return;
 
        if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
-               if (blk_get_integrity(ns->disk))
-                       blk_integrity_unregister(ns->disk);
                nvme_mpath_remove_disk_links(ns);
                sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
                                        &nvme_ns_id_attr_group);
@@ -2974,6 +2977,8 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                        nvme_nvm_unregister_sysfs(ns);
                del_gendisk(ns->disk);
                blk_cleanup_queue(ns->queue);
+               if (blk_get_integrity(ns->disk))
+                       blk_integrity_unregister(ns->disk);
        }
 
        mutex_lock(&ns->ctrl->subsys->lock);
@@ -2986,6 +2991,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
        mutex_unlock(&ns->ctrl->namespaces_mutex);
 
        synchronize_srcu(&ns->head->srcu);
+       nvme_mpath_check_last_path(ns);
        nvme_put_ns(ns);
 }
 
index 76b4fe6816a03533dbb6bbfbf30016110a26c648..894c2ccb3891e0b83e1c839f0b08f4cba5d179ae 100644 (file)
@@ -74,6 +74,7 @@ static struct nvmf_host *nvmf_host_default(void)
                return NULL;
 
        kref_init(&host->ref);
+       uuid_gen(&host->id);
        snprintf(host->nqn, NVMF_NQN_SIZE,
                "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
 
index 0a8af4daef8903f8ba983d345f1044498c57a975..794e66e4aa20115f4dc3a6b5fc12f706b2040bf4 100644 (file)
@@ -3221,7 +3221,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
                /* initiate nvme ctrl ref counting teardown */
                nvme_uninit_ctrl(&ctrl->ctrl);
-               nvme_put_ctrl(&ctrl->ctrl);
 
                /* Remove core ctrl ref. */
                nvme_put_ctrl(&ctrl->ctrl);
index ea1aa5283e8ed9215537594a33a243d47b363e25..a00eabd0642738bbdbaf0b11d7f8e1747c996e3c 100644 (file)
@@ -417,6 +417,15 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
                rcu_assign_pointer(head->current_path, NULL);
 }
 struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
+
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+       struct nvme_ns_head *head = ns->head;
+
+       if (head->disk && list_empty(&head->list))
+               kblockd_schedule_work(&head->requeue_work);
+}
+
 #else
 static inline void nvme_failover_req(struct request *req)
 {
@@ -448,6 +457,9 @@ static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns)
 static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
 }
+static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
+{
+}
 #endif /* CONFIG_NVME_MULTIPATH */
 
 #ifdef CONFIG_NVM
index f5800c3c9082a6f038c129327bccd22f5eb861fe..d53550e612bc1350febf84c891e2a28c24a6ae34 100644 (file)
@@ -448,12 +448,31 @@ static void **nvme_pci_iod_list(struct request *req)
        return (void **)(iod->sg + blk_rq_nr_phys_segments(req));
 }
 
+static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
+{
+       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+       unsigned int avg_seg_size;
+
+       avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
+                       blk_rq_nr_phys_segments(req));
+
+       if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
+               return false;
+       if (!iod->nvmeq->qid)
+               return false;
+       if (!sgl_threshold || avg_seg_size < sgl_threshold)
+               return false;
+       return true;
+}
+
 static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
        int nseg = blk_rq_nr_phys_segments(rq);
        unsigned int size = blk_rq_payload_bytes(rq);
 
+       iod->use_sgl = nvme_pci_use_sgls(dev, rq);
+
        if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
                size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
                                iod->use_sgl);
@@ -604,8 +623,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
        dma_addr_t prp_dma;
        int nprps, i;
 
-       iod->use_sgl = false;
-
        length -= (page_size - offset);
        if (length <= 0) {
                iod->first_dma = 0;
@@ -715,8 +732,6 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
        int entries = iod->nents, i = 0;
        dma_addr_t sgl_dma;
 
-       iod->use_sgl = true;
-
        /* setting the transfer type as SGL */
        cmd->flags = NVME_CMD_SGL_METABUF;
 
@@ -770,23 +785,6 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
        return BLK_STS_OK;
 }
 
-static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
-{
-       struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-       unsigned int avg_seg_size;
-
-       avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req),
-                       blk_rq_nr_phys_segments(req));
-
-       if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
-               return false;
-       if (!iod->nvmeq->qid)
-               return false;
-       if (!sgl_threshold || avg_seg_size < sgl_threshold)
-               return false;
-       return true;
-}
-
 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
                struct nvme_command *cmnd)
 {
@@ -806,7 +804,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
                                DMA_ATTR_NO_WARN))
                goto out;
 
-       if (nvme_pci_use_sgls(dev, req))
+       if (iod->use_sgl)
                ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
        else
                ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
index 37af56596be6ce8a0339ce2a3151f8dd98f8f854..2a0bba7f50cf43bb76e9d1f3073e24ba1edd9e1c 100644 (file)
@@ -974,12 +974,18 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
        blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
        nvme_start_queues(&ctrl->ctrl);
 
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+               /* state change failure should never happen */
+               WARN_ON_ONCE(1);
+               return;
+       }
+
        nvme_rdma_reconnect_or_remove(ctrl);
 }
 
 static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
 {
-       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING))
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
                return;
 
        queue_work(nvme_wq, &ctrl->err_work);
@@ -1753,6 +1759,12 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
        nvme_stop_ctrl(&ctrl->ctrl);
        nvme_rdma_shutdown_ctrl(ctrl, false);
 
+       if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) {
+               /* state change failure should never happen */
+               WARN_ON_ONCE(1);
+               return;
+       }
+
        ret = nvme_rdma_configure_admin_queue(ctrl, false);
        if (ret)
                goto out_fail;
index 7b75d9de55ab0d33939bf314a81ee01e26a6d1b1..6a018a0bd6ce851306dd82e5c21e680c626f99d5 100644 (file)
@@ -1085,7 +1085,7 @@ fcloop_delete_target_port(struct device *dev, struct device_attribute *attr,
                const char *buf, size_t count)
 {
        struct fcloop_nport *nport = NULL, *tmpport;
-       struct fcloop_tport *tport;
+       struct fcloop_tport *tport = NULL;
        u64 nodename, portname;
        unsigned long flags;
        int ret;
index a346b4923550829eca8c7a8176c4ad39dbd54f7c..41d3a3c1104ec7f810c7f9c33a2cab414c475299 100644 (file)
@@ -156,8 +156,8 @@ static int meson_mx_efuse_read(void *context, unsigned int offset,
                                 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE,
                                 MESON_MX_EFUSE_CNTL1_AUTO_RD_ENABLE);
 
-       for (i = offset; i < offset + bytes; i += efuse->config.word_size) {
-               addr = i / efuse->config.word_size;
+       for (i = 0; i < bytes; i += efuse->config.word_size) {
+               addr = (offset + i) / efuse->config.word_size;
 
                err = meson_mx_efuse_read_addr(efuse, addr, &tmp);
                if (err)
index 3481e69738b5f94d18cc393f77934fac7cfc7fdc..a327be1d264b8cea389feeb91423bb24a6a0d688 100644 (file)
@@ -231,7 +231,12 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
                        rc = of_mdiobus_register_phy(mdio, child, addr);
                else
                        rc = of_mdiobus_register_device(mdio, child, addr);
-               if (rc)
+
+               if (rc == -ENODEV)
+                       dev_err(&mdio->dev,
+                               "MDIO device at address %d is missing.\n",
+                               addr);
+               else if (rc)
                        goto unregister;
        }
 
@@ -255,7 +260,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 
                        if (of_mdiobus_child_is_phy(child)) {
                                rc = of_mdiobus_register_phy(mdio, child, addr);
-                               if (rc)
+                               if (rc && rc != -ENODEV)
                                        goto unregister;
                        }
                }
index 0b3fb99d9b8992531412fae57d809984cdf853ab..7390fb8ca9d156c485f85aa1c0cc475988765b23 100644 (file)
@@ -303,7 +303,7 @@ static void dino_mask_irq(struct irq_data *d)
        struct dino_device *dino_dev = irq_data_get_irq_chip_data(d);
        int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
 
-       DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq);
+       DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq);
 
        /* Clear the matching bit in the IMR register */
        dino_dev->imr &= ~(DINO_MASK_IRQ(local_irq));
@@ -316,7 +316,7 @@ static void dino_unmask_irq(struct irq_data *d)
        int local_irq = gsc_find_local_irq(d->irq, dino_dev->global_irq, DINO_LOCAL_IRQS);
        u32 tmp;
 
-       DBG(KERN_WARNING "%s(0x%p, %d)\n", __func__, dino_dev, d->irq);
+       DBG(KERN_WARNING "%s(0x%px, %d)\n", __func__, dino_dev, d->irq);
 
        /*
        ** clear pending IRQ bits
@@ -396,7 +396,7 @@ ilr_again:
        if (mask) {
                if (--ilr_loop > 0)
                        goto ilr_again;
-               printk(KERN_ERR "Dino 0x%p: stuck interrupt %d\n", 
+               printk(KERN_ERR "Dino 0x%px: stuck interrupt %d\n",
                       dino_dev->hba.base_addr, mask);
                return IRQ_NONE;
        }
@@ -553,7 +553,7 @@ dino_fixup_bus(struct pci_bus *bus)
         struct pci_dev *dev;
         struct dino_device *dino_dev = DINO_DEV(parisc_walk_tree(bus->bridge));
 
-       DBG(KERN_WARNING "%s(0x%p) bus %d platform_data 0x%p\n",
+       DBG(KERN_WARNING "%s(0x%px) bus %d platform_data 0x%px\n",
            __func__, bus, bus->busn_res.start,
            bus->bridge->platform_data);
 
@@ -854,7 +854,7 @@ static int __init dino_common_init(struct parisc_device *dev,
        res->flags = IORESOURCE_IO; /* do not mark it busy ! */
        if (request_resource(&ioport_resource, res) < 0) {
                printk(KERN_ERR "%s: request I/O Port region failed "
-                      "0x%lx/%lx (hpa 0x%p)\n",
+                      "0x%lx/%lx (hpa 0x%px)\n",
                       name, (unsigned long)res->start, (unsigned long)res->end,
                       dino_dev->hba.base_addr);
                return 1;
index 4dd9b1308128a4eb34aecd2824ce245f6a069fd1..99a80da6fd2e2c15f246076114106295c79a4d74 100644 (file)
@@ -106,7 +106,7 @@ static int __init eisa_eeprom_init(void)
                return retval;
        }
 
-       printk(KERN_INFO "EISA EEPROM at 0x%p\n", eisa_eeprom_addr);
+       printk(KERN_INFO "EISA EEPROM at 0x%px\n", eisa_eeprom_addr);
        return 0;
 }
 
index a25fed52f7e94de4bd3dd5cb8b0922e1df8e81bf..41b740aed3a346e4bbc610959281649447f83bd4 100644 (file)
@@ -1692,3 +1692,36 @@ void lba_set_iregs(struct parisc_device *lba, u32 ibase, u32 imask)
        iounmap(base_addr);
 }
 
+
+/*
+ * The design of the Diva management card in rp34x0 machines (rp3410, rp3440)
+ * seems rushed, so that many built-in components simply don't work.
+ * The following quirks disable the serial AUX port and the built-in ATI RV100
+ * Radeon 7000 graphics card which both don't have any external connectors and
+ * thus are useless, and even worse, e.g. the AUX port occupies ttyS0 and as
+ * such makes those machines the only PARISC machines on which we can't use
+ * ttyS0 as boot console.
+ */
+static void quirk_diva_ati_card(struct pci_dev *dev)
+{
+       if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+           dev->subsystem_device != 0x1292)
+               return;
+
+       dev_info(&dev->dev, "Hiding Diva built-in ATI card");
+       dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RADEON_QY,
+       quirk_diva_ati_card);
+
+static void quirk_diva_aux_disable(struct pci_dev *dev)
+{
+       if (dev->subsystem_vendor != PCI_VENDOR_ID_HP ||
+           dev->subsystem_device != 0x1291)
+               return;
+
+       dev_info(&dev->dev, "Hiding Diva built-in AUX serial device");
+       dev->device = 0;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
+       quirk_diva_aux_disable);
index 04dac6a42c9f287519379757a0cefd3c08e14e11..6b8d060d07de7d8ba2fc66dd3a0b0087470f4b82 100644 (file)
@@ -985,9 +985,7 @@ static u32 hv_compose_msi_req_v1(
        int_pkt->wslot.slot = slot;
        int_pkt->int_desc.vector = vector;
        int_pkt->int_desc.vector_count = 1;
-       int_pkt->int_desc.delivery_mode =
-               (apic->irq_delivery_mode == dest_LowestPrio) ?
-                       dest_LowestPrio : dest_Fixed;
+       int_pkt->int_desc.delivery_mode = dest_Fixed;
 
        /*
         * Create MSI w/ dummy vCPU set, overwritten by subsequent retarget in
@@ -1008,9 +1006,7 @@ static u32 hv_compose_msi_req_v2(
        int_pkt->wslot.slot = slot;
        int_pkt->int_desc.vector = vector;
        int_pkt->int_desc.vector_count = 1;
-       int_pkt->int_desc.delivery_mode =
-               (apic->irq_delivery_mode == dest_LowestPrio) ?
-                       dest_LowestPrio : dest_Fixed;
+       int_pkt->int_desc.delivery_mode = dest_Fixed;
 
        /*
         * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
index 945099d49f8f9b08c2b159d2cf4899b1fdc4cea4..14fd865a512096393149fd63a3707305648f276f 100644 (file)
@@ -1012,7 +1012,12 @@ static int pci_pm_thaw_noirq(struct device *dev)
        if (pci_has_legacy_pm_support(pci_dev))
                return pci_legacy_resume_early(dev);
 
-       pci_update_current_state(pci_dev, PCI_D0);
+       /*
+        * pci_restore_state() requires the device to be in D0 (because of MSI
+        * restoration among other things), so force it into D0 in case the
+        * driver's "freeze" callbacks put it into a low-power state directly.
+        */
+       pci_set_power_state(pci_dev, PCI_D0);
        pci_restore_state(pci_dev);
 
        if (drv && drv->pm && drv->pm->thaw_noirq)
index accaaaccb662fd24b1b3cf5f8e22154f5f46f9eb..6601ad0dfb3ad23c77865138b3db7bd0c4f55ae8 100644 (file)
@@ -310,7 +310,7 @@ static int cpcap_usb_init_irq(struct platform_device *pdev,
        int irq, error;
 
        irq = platform_get_irq_byname(pdev, name);
-       if (!irq)
+       if (irq < 0)
                return -ENODEV;
 
        error = devm_request_threaded_irq(ddata->dev, irq, NULL,
index cb09245e9b4c730528a7019b44f89c5abd20c7d9..c845facacb063e9a0bd2b244869d3c22c3409fbf 100644 (file)
@@ -12,7 +12,9 @@ config PHY_RCAR_GEN3_USB2
        tristate "Renesas R-Car generation 3 USB 2.0 PHY driver"
        depends on ARCH_RENESAS
        depends on EXTCON
+       depends on USB_SUPPORT
        select GENERIC_PHY
+       select USB_COMMON
        help
          Support for USB 2.0 PHY found on Renesas R-Car generation 3 SoCs.
 
index ee85fa0ca4b05bac340121cd798291708091e59e..7492c8978217f45e04f591898bfb0ddbaac2a217 100644 (file)
@@ -1137,6 +1137,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
                if (IS_ERR(phy)) {
                        dev_err(dev, "failed to create phy: %s\n",
                                child_np->name);
+                       pm_runtime_disable(dev);
                        return PTR_ERR(phy);
                }
 
@@ -1146,6 +1147,7 @@ static int rockchip_typec_phy_probe(struct platform_device *pdev)
        phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
        if (IS_ERR(phy_provider)) {
                dev_err(dev, "Failed to register phy provider\n");
+               pm_runtime_disable(dev);
                return PTR_ERR(phy_provider);
        }
 
index 4307bf0013e186cd859b779aaaf4c61ddd662a6c..63e916d4d0696156b244fb2c7e0e5f6de606c8e3 100644 (file)
@@ -75,14 +75,14 @@ MODULE_DEVICE_TABLE(of, tegra_xusb_padctl_of_match);
 static struct device_node *
 tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
 {
-       /*
-        * of_find_node_by_name() drops a reference, so make sure to grab one.
-        */
-       struct device_node *np = of_node_get(padctl->dev->of_node);
+       struct device_node *pads, *np;
+
+       pads = of_get_child_by_name(padctl->dev->of_node, "pads");
+       if (!pads)
+               return NULL;
 
-       np = of_find_node_by_name(np, "pads");
-       if (np)
-               np = of_find_node_by_name(np, name);
+       np = of_get_child_by_name(pads, name);
+       of_node_put(pads);
 
        return np;
 }
@@ -90,16 +90,16 @@ tegra_xusb_find_pad_node(struct tegra_xusb_padctl *padctl, const char *name)
 static struct device_node *
 tegra_xusb_pad_find_phy_node(struct tegra_xusb_pad *pad, unsigned int index)
 {
-       /*
-        * of_find_node_by_name() drops a reference, so make sure to grab one.
-        */
-       struct device_node *np = of_node_get(pad->dev.of_node);
+       struct device_node *np, *lanes;
 
-       np = of_find_node_by_name(np, "lanes");
-       if (!np)
+       lanes = of_get_child_by_name(pad->dev.of_node, "lanes");
+       if (!lanes)
                return NULL;
 
-       return of_find_node_by_name(np, pad->soc->lanes[index].name);
+       np = of_get_child_by_name(lanes, pad->soc->lanes[index].name);
+       of_node_put(lanes);
+
+       return np;
 }
 
 static int
@@ -195,7 +195,7 @@ int tegra_xusb_pad_register(struct tegra_xusb_pad *pad,
        unsigned int i;
        int err;
 
-       children = of_find_node_by_name(pad->dev.of_node, "lanes");
+       children = of_get_child_by_name(pad->dev.of_node, "lanes");
        if (!children)
                return -ENODEV;
 
@@ -444,21 +444,21 @@ static struct device_node *
 tegra_xusb_find_port_node(struct tegra_xusb_padctl *padctl, const char *type,
                          unsigned int index)
 {
-       /*
-        * of_find_node_by_name() drops a reference, so make sure to grab one.
-        */
-       struct device_node *np = of_node_get(padctl->dev->of_node);
+       struct device_node *ports, *np;
+       char *name;
 
-       np = of_find_node_by_name(np, "ports");
-       if (np) {
-               char *name;
+       ports = of_get_child_by_name(padctl->dev->of_node, "ports");
+       if (!ports)
+               return NULL;
 
-               name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
-               if (!name)
-                       return ERR_PTR(-ENOMEM);
-               np = of_find_node_by_name(np, name);
-               kfree(name);
+       name = kasprintf(GFP_KERNEL, "%s-%u", type, index);
+       if (!name) {
+               of_node_put(ports);
+               return ERR_PTR(-ENOMEM);
        }
+       np = of_get_child_by_name(ports, name);
+       kfree(name);
+       of_node_put(ports);
 
        return np;
 }
@@ -847,7 +847,7 @@ static void tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl)
 
 static int tegra_xusb_padctl_probe(struct platform_device *pdev)
 {
-       struct device_node *np = of_node_get(pdev->dev.of_node);
+       struct device_node *np = pdev->dev.of_node;
        const struct tegra_xusb_padctl_soc *soc;
        struct tegra_xusb_padctl *padctl;
        const struct of_device_id *match;
@@ -855,7 +855,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev)
        int err;
 
        /* for backwards compatibility with old device trees */
-       np = of_find_node_by_name(np, "pads");
+       np = of_get_child_by_name(np, "pads");
        if (!np) {
                dev_warn(&pdev->dev, "deprecated DT, using legacy driver\n");
                return tegra_xusb_padctl_legacy_probe(pdev);
index bdedb6325c72a5fa0966377d53678722d18f07eb..4471fd94e1fe1f48b953360ad76e638e88f1a7ff 100644 (file)
@@ -1620,6 +1620,22 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
                        clear_bit(i, chip->irq.valid_mask);
        }
 
+       /*
+        * The same set of machines in chv_no_valid_mask[] have incorrectly
+        * configured GPIOs that generate spurious interrupts so we use
+        * this same list to apply another quirk for them.
+        *
+        * See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
+        */
+       if (!need_valid_mask) {
+               /*
+                * Mask all interrupts the community is able to generate
+                * but leave the ones that can only generate GPEs unmasked.
+                */
+               chv_writel(GENMASK(31, pctrl->community->nirqs),
+                          pctrl->regs + CHV_INTMASK);
+       }
+
        /* Clear all interrupts */
        chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
 
index e6cd8de793e2ae66c791389ff686b42ad8084d84..3501491e5bfc8a5547c3ac820e01ad338dbb097b 100644 (file)
@@ -222,6 +222,9 @@ static enum pin_config_param pcs_bias[] = {
  */
 static struct lock_class_key pcs_lock_class;
 
+/* Class for the IRQ request mutex */
+static struct lock_class_key pcs_request_class;
+
 /*
  * REVISIT: Reads and writes could eventually use regmap or something
  * generic. But at least on omaps, some mux registers are performance
@@ -1486,7 +1489,7 @@ static int pcs_irqdomain_map(struct irq_domain *d, unsigned int irq,
        irq_set_chip_data(irq, pcs_soc);
        irq_set_chip_and_handler(irq, &pcs->chip,
                                 handle_level_irq);
-       irq_set_lockdep_class(irq, &pcs_lock_class);
+       irq_set_lockdep_class(irq, &pcs_lock_class, &pcs_request_class);
        irq_set_noprobe(irq);
 
        return 0;
index a276c61be217b41b1ce35d33875d8a532033eff0..e62ab087bfd8afd788236d11137405d4abfca3f5 100644 (file)
@@ -290,7 +290,7 @@ static int stm32_gpio_domain_translate(struct irq_domain *d,
 }
 
 static int stm32_gpio_domain_activate(struct irq_domain *d,
-                                     struct irq_data *irq_data, bool early)
+                                     struct irq_data *irq_data, bool reserve)
 {
        struct stm32_gpio_bank *bank = d->host_data;
        struct stm32_pinctrl *pctl = dev_get_drvdata(bank->gpio_chip.parent);
index 791449a2370f4f10af698dbaf27bb3749d6d263a..daa68acbc9003b34615fc43b810c5fb77dd87ecf 100644 (file)
@@ -1458,5 +1458,5 @@ static void __exit acpi_wmi_exit(void)
        class_unregister(&wmi_bus_class);
 }
 
-subsys_initcall(acpi_wmi_init);
+subsys_initcall_sync(acpi_wmi_init);
 module_exit(acpi_wmi_exit);
index c94b606e0df8831e27a2c66a38ec19f43ad9a664..ee14d8e45c971863c6aef252ecbadba64d365826 100644 (file)
@@ -2803,6 +2803,16 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr)
                erp = dasd_3990_erp_handle_match_erp(cqr, erp);
        }
 
+
+       /*
+        * For path verification work we need to stick with the path that was
+        * originally chosen so that the per path configuration data is
+        * assigned correctly.
+        */
+       if (test_bit(DASD_CQR_VERIFY_PATH, &erp->flags) && cqr->lpm) {
+               erp->lpm = cqr->lpm;
+       }
+
        if (device->features & DASD_FEATURE_ERPLOG) {
                /* print current erp_chain */
                dev_err(&device->cdev->dev,
index 05ac6ba15a53285f41e01ab415b776366ddb5e54..614b44e70a2818cbec5bfc14b61c09f16d5fa145 100644 (file)
@@ -17,6 +17,8 @@ CFLAGS_REMOVE_sclp_early_core.o       += $(CC_FLAGS_MARCH)
 CFLAGS_sclp_early_core.o               += -march=z900
 endif
 
+CFLAGS_sclp_early_core.o               += -D__NO_FORTIFY
+
 obj-y += ctrlchar.o keyboard.o defkeymap.o sclp.o sclp_rw.o sclp_quiesce.o \
         sclp_cmd.o sclp_config.o sclp_cpi_sys.o sclp_ocf.o sclp_ctl.o \
         sclp_early.o sclp_early_core.o
index 6c815207f4f50470c1a0fea8992183aa67f69e37..3614df68830f8f6a4abd756c52ca4e1e72e8e1d1 100644 (file)
@@ -5386,6 +5386,13 @@ out:
 }
 EXPORT_SYMBOL_GPL(qeth_poll);
 
+static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
+{
+       if (!cmd->hdr.return_code)
+               cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
+       return cmd->hdr.return_code;
+}
+
 int qeth_setassparms_cb(struct qeth_card *card,
                        struct qeth_reply *reply, unsigned long data)
 {
@@ -6242,7 +6249,7 @@ static int qeth_ipa_checksum_run_cmd_cb(struct qeth_card *card,
                                (struct qeth_checksum_cmd *)reply->param;
 
        QETH_CARD_TEXT(card, 4, "chkdoccb");
-       if (cmd->hdr.return_code)
+       if (qeth_setassparms_inspect_rc(cmd))
                return 0;
 
        memset(chksum_cb, 0, sizeof(*chksum_cb));
index 6e3d81969a77cc895580f79fa8e3aaa3b8bb4fee..d52265416da2af0da11cca770304f33ab203ad20 100644 (file)
@@ -1725,6 +1725,7 @@ struct aac_dev
 #define FIB_CONTEXT_FLAG_NATIVE_HBA            (0x00000010)
 #define FIB_CONTEXT_FLAG_NATIVE_HBA_TMF        (0x00000020)
 #define FIB_CONTEXT_FLAG_SCSI_CMD      (0x00000040)
+#define FIB_CONTEXT_FLAG_EH_RESET      (0x00000080)
 
 /*
  *     Define the command values
index bdf127aaab41d814e2337d2944166a0498bf1a66..d55332de08f91ad8e54e1296867569a8fa109a34 100644 (file)
@@ -1037,7 +1037,7 @@ static int aac_eh_bus_reset(struct scsi_cmnd* cmd)
                        info = &aac->hba_map[bus][cid];
                        if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS ||
                            info->devtype != AAC_DEVTYPE_NATIVE_RAW) {
-                               fib->flags |= FIB_CONTEXT_FLAG_TIMED_OUT;
+                               fib->flags |= FIB_CONTEXT_FLAG_EH_RESET;
                                cmd->SCp.phase = AAC_OWNER_ERROR_HANDLER;
                        }
                }
index a4f28b7e4c65df81ef583eab878a3aa9fc45e0e4..e18877177f1b52d9c43ad3b991b858c80a6cc079 100644 (file)
@@ -1576,7 +1576,9 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
                return req;
 
        for_each_bio(bio) {
-               ret = blk_rq_append_bio(req, bio);
+               struct bio *bounce_bio = bio;
+
+               ret = blk_rq_append_bio(req, &bounce_bio);
                if (ret)
                        return ERR_PTR(ret);
        }
index 449ef5adbb2bc3719b3ba72953ebbecef9e4d086..dfb8da83fa504c979e9ba0639b32a4cae2c8969c 100644 (file)
@@ -374,10 +374,8 @@ int scsi_dev_info_list_add_keyed(int compatible, char *vendor, char *model,
                            model, compatible);
 
        if (strflags)
-               devinfo->flags = simple_strtoul(strflags, NULL, 0);
-       else
-               devinfo->flags = flags;
-
+               flags = (__force blist_flags_t)simple_strtoul(strflags, NULL, 0);
+       devinfo->flags = flags;
        devinfo->compatible = compatible;
 
        if (compatible)
index be5e919db0e8cd9e713727a91bc46923673ea556..0880d975eed3a56c58d27172bfd18c1a59da5d4b 100644 (file)
@@ -770,7 +770,7 @@ static int scsi_probe_lun(struct scsi_device *sdev, unsigned char *inq_result,
  *     SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
 static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
-               int *bflags, int async)
+               blist_flags_t *bflags, int async)
 {
        int ret;
 
@@ -1049,14 +1049,15 @@ static unsigned char *scsi_inq_str(unsigned char *buf, unsigned char *inq,
  *   - SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
  **/
 static int scsi_probe_and_add_lun(struct scsi_target *starget,
-                                 u64 lun, int *bflagsp,
+                                 u64 lun, blist_flags_t *bflagsp,
                                  struct scsi_device **sdevp,
                                  enum scsi_scan_mode rescan,
                                  void *hostdata)
 {
        struct scsi_device *sdev;
        unsigned char *result;
-       int bflags, res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
+       blist_flags_t bflags;
+       int res = SCSI_SCAN_NO_RESPONSE, result_len = 256;
        struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 
        /*
@@ -1201,7 +1202,7 @@ static int scsi_probe_and_add_lun(struct scsi_target *starget,
  *     Modifies sdevscan->lun.
  **/
 static void scsi_sequential_lun_scan(struct scsi_target *starget,
-                                    int bflags, int scsi_level,
+                                    blist_flags_t bflags, int scsi_level,
                                     enum scsi_scan_mode rescan)
 {
        uint max_dev_lun;
@@ -1292,7 +1293,7 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
  *     0: scan completed (or no memory, so further scanning is futile)
  *     1: could not scan with REPORT LUN
  **/
-static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
+static int scsi_report_lun_scan(struct scsi_target *starget, blist_flags_t bflags,
                                enum scsi_scan_mode rescan)
 {
        unsigned char scsi_cmd[MAX_COMMAND_SIZE];
@@ -1538,7 +1539,7 @@ static void __scsi_scan_target(struct device *parent, unsigned int channel,
                unsigned int id, u64 lun, enum scsi_scan_mode rescan)
 {
        struct Scsi_Host *shost = dev_to_shost(parent);
-       int bflags = 0;
+       blist_flags_t bflags = 0;
        int res;
        struct scsi_target *starget;
 
index 50e7d7e4a86179b9a47d18569bb759be0b674b88..26ce17178401b645bd9548e49dd11fecb3babf8c 100644 (file)
@@ -967,7 +967,8 @@ sdev_show_wwid(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(wwid, S_IRUGO, sdev_show_wwid, NULL);
 
-#define BLIST_FLAG_NAME(name) [ilog2(BLIST_##name)] = #name
+#define BLIST_FLAG_NAME(name)                                  \
+       [ilog2((__force unsigned int)BLIST_##name)] = #name
 static const char *const sdev_bflags_name[] = {
 #include "scsi_devinfo_tbl.c"
 };
@@ -984,7 +985,7 @@ sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
        for (i = 0; i < sizeof(sdev->sdev_bflags) * BITS_PER_BYTE; i++) {
                const char *name = NULL;
 
-               if (!(sdev->sdev_bflags & BIT(i)))
+               if (!(sdev->sdev_bflags & (__force blist_flags_t)BIT(i)))
                        continue;
                if (i < ARRAY_SIZE(sdev_bflags_name) && sdev_bflags_name[i])
                        name = sdev_bflags_name[i];
@@ -1414,7 +1415,10 @@ static void __scsi_remove_target(struct scsi_target *starget)
                 * check.
                 */
                if (sdev->channel != starget->channel ||
-                   sdev->id != starget->id ||
+                   sdev->id != starget->id)
+                       continue;
+               if (sdev->sdev_state == SDEV_DEL ||
+                   sdev->sdev_state == SDEV_CANCEL ||
                    !get_device(&sdev->sdev_gendev))
                        continue;
                spin_unlock_irqrestore(shost->host_lock, flags);
index d0219e36080c3b79109ac405eb0cd726545585fc..10ebb213ddb33e2920e2fe83e60cc712a50c3002 100644 (file)
 
 /* Our blacklist flags */
 enum {
-       SPI_BLIST_NOIUS = 0x1,
+       SPI_BLIST_NOIUS = (__force blist_flags_t)0x1,
 };
 
 /* blacklist table, modelled on scsi_devinfo.c */
 static struct {
        char *vendor;
        char *model;
-       unsigned flags;
+       blist_flags_t flags;
 } spi_static_device_list[] __initdata = {
        {"HP", "Ultrium 3-SCSI", SPI_BLIST_NOIUS },
        {"IBM", "ULTRIUM-TD3", SPI_BLIST_NOIUS },
@@ -221,9 +221,11 @@ static int spi_device_configure(struct transport_container *tc,
 {
        struct scsi_device *sdev = to_scsi_device(dev);
        struct scsi_target *starget = sdev->sdev_target;
-       unsigned bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
-                                                     &sdev->inquiry[16],
-                                                     SCSI_DEVINFO_SPI);
+       blist_flags_t bflags;
+
+       bflags = scsi_get_device_flags_keyed(sdev, &sdev->inquiry[8],
+                                            &sdev->inquiry[16],
+                                            SCSI_DEVINFO_SPI);
 
        /* Populate the target capability fields with the values
         * gleaned from the device inquiry */
index 1b06cf0375dcdbd6f1780ab7535bbaa4c5742916..3b3d1d050cacaa3d83dc615e29691ccc2c5de87f 100644 (file)
@@ -953,10 +953,11 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
                case TEST_UNIT_READY:
                        break;
                default:
-                       set_host_byte(scmnd, DID_TARGET_FAILURE);
+                       set_host_byte(scmnd, DID_ERROR);
                }
                break;
        case SRB_STATUS_INVALID_LUN:
+               set_host_byte(scmnd, DID_NO_CONNECT);
                do_work = true;
                process_err_fn = storvsc_remove_lun;
                break;
index 77fe55ce790c61a8835c4e2338a36be43dafcbac..d65345312527ce450b539964aa0465e1e6787b44 100644 (file)
@@ -79,6 +79,7 @@
 #define A3700_SPI_BYTE_LEN             BIT(5)
 #define A3700_SPI_CLK_PRESCALE         BIT(0)
 #define A3700_SPI_CLK_PRESCALE_MASK    (0x1f)
+#define A3700_SPI_CLK_EVEN_OFFS                (0x10)
 
 #define A3700_SPI_WFIFO_THRS_BIT       28
 #define A3700_SPI_RFIFO_THRS_BIT       24
@@ -220,6 +221,13 @@ static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
 
        prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
 
+       /* For prescaler values over 15, we can only set it by steps of 2.
+        * Starting from A3700_SPI_CLK_EVEN_OFFS, we set values from 0 up to
+        * 30. We only use this range from 16 to 30.
+        */
+       if (prescale > 15)
+               prescale = A3700_SPI_CLK_EVEN_OFFS + DIV_ROUND_UP(prescale, 2);
+
        val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
        val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
 
index f95da364c2832b0142158e12c97648aab81b7165..66947097102370d0f54ba2559abddab1ac812a5d 100644 (file)
@@ -1661,12 +1661,12 @@ static int atmel_spi_remove(struct platform_device *pdev)
        pm_runtime_get_sync(&pdev->dev);
 
        /* reset the hardware and block queue progress */
-       spin_lock_irq(&as->lock);
        if (as->use_dma) {
                atmel_spi_stop_dma(master);
                atmel_spi_release_dma(master);
        }
 
+       spin_lock_irq(&as->lock);
        spi_writel(as, CR, SPI_BIT(SWRST));
        spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
        spi_readl(as, SR);
index 2ce875764ca646a2bdfb803cae33465ab8fa1786..0835a8d88fb8f85ab5ae44a4aa74d94121d19d87 100644 (file)
@@ -377,8 +377,8 @@ static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
        /* Sets SPCMD */
        rspi_write16(rspi, rspi->spcmd, RSPI_SPCMD0);
 
-       /* Enables SPI function in master mode */
-       rspi_write8(rspi, SPCR_SPE | SPCR_MSTR, RSPI_SPCR);
+       /* Sets RSPI mode */
+       rspi_write8(rspi, SPCR_MSTR, RSPI_SPCR);
 
        return 0;
 }
index c5cd635c28f388bec2cfd47b9a6c6c9dcec9e046..41410031f8e99e6a1d54b8f94990df0133356ced 100644 (file)
@@ -525,7 +525,7 @@ err_free_master:
 
 static int sun4i_spi_remove(struct platform_device *pdev)
 {
-       pm_runtime_disable(&pdev->dev);
+       pm_runtime_force_suspend(&pdev->dev);
 
        return 0;
 }
index bc7100b93dfcf0c24213f479f9a5fffc41666315..e0b9fe1d0e37d98a7243ca35a56b1d62e024e8b3 100644 (file)
@@ -271,6 +271,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
        while (remaining_words) {
                int n_words, tx_words, rx_words;
                u32 sr;
+               int stalled;
 
                n_words = min(remaining_words, xspi->buffer_size);
 
@@ -299,7 +300,17 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t)
 
                /* Read out all the data from the Rx FIFO */
                rx_words = n_words;
+               stalled = 10;
                while (rx_words) {
+                       if (rx_words == n_words && !(stalled--) &&
+                           !(sr & XSPI_SR_TX_EMPTY_MASK) &&
+                           (sr & XSPI_SR_RX_EMPTY_MASK)) {
+                               dev_err(&spi->dev,
+                                       "Detected stall. Check C_SPI_MODE and C_SPI_MEMORY\n");
+                               xspi_init_hw(xspi);
+                               return -EIO;
+                       }
+
                        if ((sr & XSPI_SR_TX_EMPTY_MASK) && (rx_words > 1)) {
                                xilinx_spi_rx(xspi);
                                rx_words--;
index 0f695df14c9d8f0a5c0a95ea8c28dc075c2aa36c..372ce9913e6dea373d4cdd06ca51f2419c7a7373 100644 (file)
@@ -765,10 +765,12 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                break;
        case ASHMEM_SET_SIZE:
                ret = -EINVAL;
+               mutex_lock(&ashmem_mutex);
                if (!asma->file) {
                        ret = 0;
                        asma->size = (size_t)arg;
                }
+               mutex_unlock(&ashmem_mutex);
                break;
        case ASHMEM_GET_SIZE:
                ret = asma->size;
index a517b2d29f1bb6efce2f22a5520a009c1375bcfb..8f6494158d3d018b847ad989f6fa1c2648848094 100644 (file)
@@ -37,7 +37,7 @@ config ION_CHUNK_HEAP
 
 config ION_CMA_HEAP
        bool "Ion CMA heap support"
-       depends on ION && CMA
+       depends on ION && DMA_CMA
        help
          Choose this option to enable CMA heaps with Ion. This heap is backed
          by the Contiguous Memory Allocator (CMA). If your system has these
index a7d9b0e9857225abf7a6c88885d15e76afc6627b..f480885e346b69cb6ec881abefca0ee3aaf9fd15 100644 (file)
@@ -346,7 +346,7 @@ static int ion_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
        mutex_lock(&buffer->lock);
        list_for_each_entry(a, &buffer->attachments, list) {
                dma_sync_sg_for_cpu(a->dev, a->table->sgl, a->table->nents,
-                                   DMA_BIDIRECTIONAL);
+                                   direction);
        }
        mutex_unlock(&buffer->lock);
 
@@ -368,7 +368,7 @@ static int ion_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
        mutex_lock(&buffer->lock);
        list_for_each_entry(a, &buffer->attachments, list) {
                dma_sync_sg_for_device(a->dev, a->table->sgl, a->table->nents,
-                                      DMA_BIDIRECTIONAL);
+                                      direction);
        }
        mutex_unlock(&buffer->lock);
 
index dd5545d9990a0f61ac2a1ba6249ec851602d1b3b..86196ffd2faf9a7733cd931fa48e24e5ace3fd22 100644 (file)
@@ -39,9 +39,15 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
        struct ion_cma_heap *cma_heap = to_cma_heap(heap);
        struct sg_table *table;
        struct page *pages;
+       unsigned long size = PAGE_ALIGN(len);
+       unsigned long nr_pages = size >> PAGE_SHIFT;
+       unsigned long align = get_order(size);
        int ret;
 
-       pages = cma_alloc(cma_heap->cma, len, 0, GFP_KERNEL);
+       if (align > CONFIG_CMA_ALIGNMENT)
+               align = CONFIG_CMA_ALIGNMENT;
+
+       pages = cma_alloc(cma_heap->cma, nr_pages, align, GFP_KERNEL);
        if (!pages)
                return -ENOMEM;
 
@@ -53,7 +59,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
        if (ret)
                goto free_mem;
 
-       sg_set_page(table->sgl, pages, len, 0);
+       sg_set_page(table->sgl, pages, size, 0);
 
        buffer->priv_virt = pages;
        buffer->sg_table = table;
@@ -62,7 +68,7 @@ static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
 free_mem:
        kfree(table);
 err:
-       cma_release(cma_heap->cma, pages, buffer->size);
+       cma_release(cma_heap->cma, pages, nr_pages);
        return -ENOMEM;
 }
 
@@ -70,9 +76,10 @@ static void ion_cma_free(struct ion_buffer *buffer)
 {
        struct ion_cma_heap *cma_heap = to_cma_heap(buffer->heap);
        struct page *pages = buffer->priv_virt;
+       unsigned long nr_pages = PAGE_ALIGN(buffer->size) >> PAGE_SHIFT;
 
        /* release memory */
-       cma_release(cma_heap->cma, pages, buffer->size);
+       cma_release(cma_heap->cma, pages, nr_pages);
        /* release sg table */
        sg_free_table(buffer->sg_table);
        kfree(buffer->sg_table);
index 986c2a40d9780ecbbc2226d9be6fa74876e85681..8267119ccc8e73108bc8e4f2edb777861192f353 100644 (file)
@@ -487,21 +487,18 @@ ksocknal_add_peer(struct lnet_ni *ni, struct lnet_process_id id, __u32 ipaddr,
                              ksocknal_nid2peerlist(id.nid));
        }
 
-       route2 = NULL;
        list_for_each_entry(route2, &peer->ksnp_routes, ksnr_list) {
-               if (route2->ksnr_ipaddr == ipaddr)
-                       break;
-
-               route2 = NULL;
-       }
-       if (!route2) {
-               ksocknal_add_route_locked(peer, route);
-               route->ksnr_share_count++;
-       } else {
-               ksocknal_route_decref(route);
-               route2->ksnr_share_count++;
+               if (route2->ksnr_ipaddr == ipaddr) {
+                       /* Route already exists, use the old one */
+                       ksocknal_route_decref(route);
+                       route2->ksnr_share_count++;
+                       goto out;
+               }
        }
-
+       /* Route doesn't already exist, add the new one */
+       ksocknal_add_route_locked(peer, route);
+       route->ksnr_share_count++;
+out:
        write_unlock_bh(&ksocknal_data.ksnd_global_lock);
 
        return 0;
index 7c69b4a9694d2016a8aac3b63a4b7d4399146688..0d99b242e82e3f84da25a47564f96db60be4b5f5 100644 (file)
@@ -920,7 +920,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                                        " %d i: %d bio: %p, allocating another"
                                        " bio\n", bio->bi_vcnt, i, bio);
 
-                               rc = blk_rq_append_bio(req, bio);
+                               rc = blk_rq_append_bio(req, &bio);
                                if (rc) {
                                        pr_err("pSCSI: failed to append bio\n");
                                        goto fail;
@@ -938,7 +938,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
        }
 
        if (bio) {
-               rc = blk_rq_append_bio(req, bio);
+               rc = blk_rq_append_bio(req, &bio);
                if (rc) {
                        pr_err("pSCSI: failed to append bio\n");
                        goto fail;
index 419a7a90bce0e21c40afc165506420aa9146fb72..f45bcbc63738ffb3598e958e1af529c443b98e1d 100644 (file)
@@ -339,7 +339,7 @@ static void __ring_interrupt(struct tb_ring *ring)
                return;
 
        if (ring->start_poll) {
-               __ring_interrupt_mask(ring, false);
+               __ring_interrupt_mask(ring, true);
                ring->start_poll(ring->poll_data);
        } else {
                schedule_work(&ring->work);
index 427e0d5d8f135e56249c120fdfb50aca1f65021d..539b49adb6afd41190eee97f6eaf1950c8a1ac3f 100644 (file)
@@ -1762,7 +1762,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 {
        struct n_tty_data *ldata = tty->disc_data;
 
-       if (!old || (old->c_lflag ^ tty->termios.c_lflag) & ICANON) {
+       if (!old || (old->c_lflag ^ tty->termios.c_lflag) & (ICANON | EXTPROC)) {
                bitmap_zero(ldata->read_flags, N_TTY_BUF_SIZE);
                ldata->line_start = ldata->read_tail;
                if (!L_ICANON(tty) || !read_cnt(ldata)) {
@@ -2425,7 +2425,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
                return put_user(tty_chars_in_buffer(tty), (int __user *) arg);
        case TIOCINQ:
                down_write(&tty->termios_rwsem);
-               if (L_ICANON(tty))
+               if (L_ICANON(tty) && !L_EXTPROC(tty))
                        retval = inq_canon(ldata);
                else
                        retval = read_cnt(ldata);
index 3593ce0ec641d848f1a15438ffb76295558ab9a6..880009987460affefa3aac2bf61aee6174195f81 100644 (file)
@@ -247,7 +247,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev)
        if (ret)
                goto err_mux;
 
-       ulpi_node = of_find_node_by_name(of_node_get(pdev->dev.of_node), "ulpi");
+       ulpi_node = of_get_child_by_name(pdev->dev.of_node, "ulpi");
        if (ulpi_node) {
                phy_node = of_get_next_available_child(ulpi_node, NULL);
                ci->hsic = of_device_is_compatible(phy_node, "qcom,usb-hsic-phy");
index 78e92d29f8d98777c1294292808b1a868dcfcb7f..c821b4b9647e357468335fca83b3aa980e600315 100644 (file)
@@ -1007,7 +1007,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
                case USB_SSP_CAP_TYPE:
                        ssp_cap = (struct usb_ssp_cap_descriptor *)buffer;
                        ssac = (le32_to_cpu(ssp_cap->bmAttributes) &
-                               USB_SSP_SUBLINK_SPEED_ATTRIBS) + 1;
+                               USB_SSP_SUBLINK_SPEED_ATTRIBS);
                        if (length >= USB_DT_USB_SSP_CAP_SIZE(ssac))
                                dev->bos->ssp_cap = ssp_cap;
                        break;
index a10b346b9777dba58abe8346cb4926e42d8bb7aa..4024926c1d68c93e97e40f822c4a690a4c113987 100644 (file)
@@ -52,10 +52,11 @@ static const struct usb_device_id usb_quirk_list[] = {
        /* Microsoft LifeCam-VX700 v2.0 */
        { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
 
-       /* Logitech HD Pro Webcams C920, C920-C and C930e */
+       /* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
        { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
        { USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
        { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT },
+       { USB_DEVICE(0x046d, 0x085b), .driver_info = USB_QUIRK_DELAY_INIT },
 
        /* Logitech ConferenceCam CC3000e */
        { USB_DEVICE(0x046d, 0x0847), .driver_info = USB_QUIRK_DELAY_INIT },
@@ -149,6 +150,9 @@ static const struct usb_device_id usb_quirk_list[] = {
        /* Genesys Logic hub, internally used by KY-688 USB 3.1 Type-C Hub */
        { USB_DEVICE(0x05e3, 0x0612), .driver_info = USB_QUIRK_NO_LPM },
 
+       /* ELSA MicroLink 56K */
+       { USB_DEVICE(0x05cc, 0x2267), .driver_info = USB_QUIRK_RESET_RESUME },
+
        /* Genesys Logic hub, internally used by Moshi USB to Ethernet Adapter */
        { USB_DEVICE(0x05e3, 0x0616), .driver_info = USB_QUIRK_NO_LPM },
 
index 93eff7dec2f5e9d7d9449b1189f8e5d2c91728e6..1b3efb14aec7878e616a3a1a7588046e2641aec7 100644 (file)
@@ -1147,11 +1147,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 
        udc = kzalloc(sizeof(*udc), GFP_KERNEL);
        if (!udc)
-               goto err1;
-
-       ret = device_add(&gadget->dev);
-       if (ret)
-               goto err2;
+               goto err_put_gadget;
 
        device_initialize(&udc->dev);
        udc->dev.release = usb_udc_release;
@@ -1160,7 +1156,11 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
        udc->dev.parent = parent;
        ret = dev_set_name(&udc->dev, "%s", kobject_name(&parent->kobj));
        if (ret)
-               goto err3;
+               goto err_put_udc;
+
+       ret = device_add(&gadget->dev);
+       if (ret)
+               goto err_put_udc;
 
        udc->gadget = gadget;
        gadget->udc = udc;
@@ -1170,7 +1170,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
 
        ret = device_add(&udc->dev);
        if (ret)
-               goto err4;
+               goto err_unlist_udc;
 
        usb_gadget_set_state(gadget, USB_STATE_NOTATTACHED);
        udc->vbus = true;
@@ -1178,27 +1178,25 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
        /* pick up one of pending gadget drivers */
        ret = check_pending_gadget_drivers(udc);
        if (ret)
-               goto err5;
+               goto err_del_udc;
 
        mutex_unlock(&udc_lock);
 
        return 0;
 
-err5:
+ err_del_udc:
        device_del(&udc->dev);
 
-err4:
+ err_unlist_udc:
        list_del(&udc->list);
        mutex_unlock(&udc_lock);
 
-err3:
-       put_device(&udc->dev);
        device_del(&gadget->dev);
 
-err2:
-       kfree(udc);
+ err_put_udc:
+       put_device(&udc->dev);
 
-err1:
+ err_put_gadget:
        put_device(&gadget->dev);
        return ret;
 }
index 4f7895dbcf880ef2a1e3b863b7002825e3cb752c..e26e685d8a578fddffaa7ff220a0c4442ba4f5ce 100644 (file)
@@ -162,7 +162,7 @@ static void xhci_debugfs_extcap_regset(struct xhci_hcd *xhci, int cap_id,
 static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
 {
        dma_addr_t              dma;
-       struct xhci_ring        *ring = s->private;
+       struct xhci_ring        *ring = *(struct xhci_ring **)s->private;
 
        dma = xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue);
        seq_printf(s, "%pad\n", &dma);
@@ -173,7 +173,7 @@ static int xhci_ring_enqueue_show(struct seq_file *s, void *unused)
 static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
 {
        dma_addr_t              dma;
-       struct xhci_ring        *ring = s->private;
+       struct xhci_ring        *ring = *(struct xhci_ring **)s->private;
 
        dma = xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue);
        seq_printf(s, "%pad\n", &dma);
@@ -183,7 +183,7 @@ static int xhci_ring_dequeue_show(struct seq_file *s, void *unused)
 
 static int xhci_ring_cycle_show(struct seq_file *s, void *unused)
 {
-       struct xhci_ring        *ring = s->private;
+       struct xhci_ring        *ring = *(struct xhci_ring **)s->private;
 
        seq_printf(s, "%d\n", ring->cycle_state);
 
@@ -346,7 +346,7 @@ static void xhci_debugfs_create_files(struct xhci_hcd *xhci,
 }
 
 static struct dentry *xhci_debugfs_create_ring_dir(struct xhci_hcd *xhci,
-                                                  struct xhci_ring *ring,
+                                                  struct xhci_ring **ring,
                                                   const char *name,
                                                   struct dentry *parent)
 {
@@ -387,7 +387,7 @@ void xhci_debugfs_create_endpoint(struct xhci_hcd *xhci,
 
        snprintf(epriv->name, sizeof(epriv->name), "ep%02d", ep_index);
        epriv->root = xhci_debugfs_create_ring_dir(xhci,
-                                                  dev->eps[ep_index].new_ring,
+                                                  &dev->eps[ep_index].new_ring,
                                                   epriv->name,
                                                   spriv->root);
        spriv->eps[ep_index] = epriv;
@@ -423,7 +423,7 @@ void xhci_debugfs_create_slot(struct xhci_hcd *xhci, int slot_id)
        priv->dev = dev;
        dev->debugfs_private = priv;
 
-       xhci_debugfs_create_ring_dir(xhci, dev->eps[0].ring,
+       xhci_debugfs_create_ring_dir(xhci, &dev->eps[0].ring,
                                     "ep00", priv->root);
 
        xhci_debugfs_create_context_files(xhci, priv->root, slot_id);
@@ -488,11 +488,11 @@ void xhci_debugfs_init(struct xhci_hcd *xhci)
                                   ARRAY_SIZE(xhci_extcap_dbc),
                                   "reg-ext-dbc");
 
-       xhci_debugfs_create_ring_dir(xhci, xhci->cmd_ring,
+       xhci_debugfs_create_ring_dir(xhci, &xhci->cmd_ring,
                                     "command-ring",
                                     xhci->debugfs_root);
 
-       xhci_debugfs_create_ring_dir(xhci, xhci->event_ring,
+       xhci_debugfs_create_ring_dir(xhci, &xhci->event_ring,
                                     "event-ring",
                                     xhci->debugfs_root);
 
index 7ef1274ef7f7f245a03e539982da07f6242343b1..1aad89b8aba0b5b5b4610aa161bc7379ba45fbb7 100644 (file)
@@ -177,6 +177,9 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
                xhci->quirks |= XHCI_TRUST_TX_LENGTH;
                xhci->quirks |= XHCI_BROKEN_STREAMS;
        }
+       if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
+                       pdev->device == 0x0014)
+               xhci->quirks |= XHCI_TRUST_TX_LENGTH;
        if (pdev->vendor == PCI_VENDOR_ID_RENESAS &&
                        pdev->device == 0x0015)
                xhci->quirks |= XHCI_RESET_ON_RESUME;
index 2424d3020ca364b22792376e36c21462af3b2f62..da6dbe3ebd8be9c8f137c6f500663be857184080 100644 (file)
@@ -3525,8 +3525,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
        struct xhci_slot_ctx *slot_ctx;
        int i, ret;
 
-       xhci_debugfs_remove_slot(xhci, udev->slot_id);
-
 #ifndef CONFIG_USB_DEFAULT_PERSIST
        /*
         * We called pm_runtime_get_noresume when the device was attached.
@@ -3555,8 +3553,10 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev)
        }
 
        ret = xhci_disable_slot(xhci, udev->slot_id);
-       if (ret)
+       if (ret) {
+               xhci_debugfs_remove_slot(xhci, udev->slot_id);
                xhci_free_virt_device(xhci, udev->slot_id);
+       }
 }
 
 int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id)
index 465dbf68b4633d438e858260d5db12d6bc0ce44e..f723f7b8c9ac4cdb65e5beb9a401ac3ce64f99e2 100644 (file)
@@ -279,6 +279,8 @@ static int usb3503_probe(struct usb3503 *hub)
        if (gpio_is_valid(hub->gpio_reset)) {
                err = devm_gpio_request_one(dev, hub->gpio_reset,
                                GPIOF_OUT_INIT_LOW, "usb3503 reset");
+               /* Datasheet defines a hardware reset to be at least 100us */
+               usleep_range(100, 10000);
                if (err) {
                        dev_err(dev,
                                "unable to request GPIO %d as reset pin (%d)\n",
index f6ae753ab99b0998fcc99463fd50971f658dfaf0..f932f40302df942b744353f0db738edba88c553f 100644 (file)
@@ -1004,7 +1004,9 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
                break;
 
        case MON_IOCQ_RING_SIZE:
+               mutex_lock(&rp->fetch_lock);
                ret = rp->b_size;
+               mutex_unlock(&rp->fetch_lock);
                break;
 
        case MON_IOCT_RING_SIZE:
@@ -1231,12 +1233,16 @@ static int mon_bin_vma_fault(struct vm_fault *vmf)
        unsigned long offset, chunk_idx;
        struct page *pageptr;
 
+       mutex_lock(&rp->fetch_lock);
        offset = vmf->pgoff << PAGE_SHIFT;
-       if (offset >= rp->b_size)
+       if (offset >= rp->b_size) {
+               mutex_unlock(&rp->fetch_lock);
                return VM_FAULT_SIGBUS;
+       }
        chunk_idx = offset / CHUNK_SIZE;
        pageptr = rp->b_vec[chunk_idx].pg;
        get_page(pageptr);
+       mutex_unlock(&rp->fetch_lock);
        vmf->page = pageptr;
        return 0;
 }
index 7c6273bf5bebcfff7b573c23659ef2da219bd29d..06d502b3e91344c809523f67cf75cd364dc2cbed 100644 (file)
@@ -124,6 +124,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
        { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
        { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+       { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
        { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
        { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
        { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
@@ -174,6 +175,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
        { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
        { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+       { USB_DEVICE(0x18EF, 0xE030) }, /* ELV ALC 8xxx Battery Charger */
        { USB_DEVICE(0x18EF, 0xE032) }, /* ELV TFD500 Data Logger */
        { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
        { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
index 1aba9105b369678759a204780bb7b3d882b7d879..fc68952c994a5557bb8ca3e2de94f9d2c1b15791 100644 (file)
@@ -1013,6 +1013,7 @@ static const struct usb_device_id id_table_combined[] = {
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_BT_USB_PID) },
        { USB_DEVICE(CYPRESS_VID, CYPRESS_WICED_WL_USB_PID) },
+       { USB_DEVICE(AIRBUS_DS_VID, AIRBUS_DS_P8GR) },
        { }                                     /* Terminating entry */
 };
 
index 4faa09fe308ca0570cc9dcda6900ef74efa3e1d0..8b4ecd2bd297b84d5a86c9b02cd5c38d3a6e86a8 100644 (file)
 #define ICPDAS_I7561U_PID              0x0104
 #define ICPDAS_I7563U_PID              0x0105
 
+/*
+ * Airbus Defence and Space
+ */
+#define AIRBUS_DS_VID                  0x1e8e  /* Vendor ID */
+#define AIRBUS_DS_P8GR                 0x6001  /* Tetra P8GR */
+
 /*
  * RT Systems programming cables for various ham radios
  */
index 3b3513874cfd1e75a5380ee208f02c1144919cd1..b6320e3be42970c984cb86505251cde089437102 100644 (file)
@@ -233,6 +233,8 @@ static void option_instat_callback(struct urb *urb);
 /* These Quectel products use Qualcomm's vendor ID */
 #define QUECTEL_PRODUCT_UC20                   0x9003
 #define QUECTEL_PRODUCT_UC15                   0x9090
+/* These Yuga products use Qualcomm's vendor ID */
+#define YUGA_PRODUCT_CLM920_NC5                        0x9625
 
 #define QUECTEL_VENDOR_ID                      0x2c7c
 /* These Quectel products use Quectel's vendor ID */
@@ -280,6 +282,7 @@ static void option_instat_callback(struct urb *urb);
 #define TELIT_PRODUCT_LE922_USBCFG3            0x1043
 #define TELIT_PRODUCT_LE922_USBCFG5            0x1045
 #define TELIT_PRODUCT_ME910                    0x1100
+#define TELIT_PRODUCT_ME910_DUAL_MODEM         0x1101
 #define TELIT_PRODUCT_LE920                    0x1200
 #define TELIT_PRODUCT_LE910                    0x1201
 #define TELIT_PRODUCT_LE910_USBCFG4            0x1206
@@ -645,6 +648,11 @@ static const struct option_blacklist_info telit_me910_blacklist = {
        .reserved = BIT(1) | BIT(3),
 };
 
+static const struct option_blacklist_info telit_me910_dual_modem_blacklist = {
+       .sendsetup = BIT(0),
+       .reserved = BIT(3),
+};
+
 static const struct option_blacklist_info telit_le910_blacklist = {
        .sendsetup = BIT(0),
        .reserved = BIT(1) | BIT(2),
@@ -674,6 +682,10 @@ static const struct option_blacklist_info cinterion_rmnet2_blacklist = {
        .reserved = BIT(4) | BIT(5),
 };
 
+static const struct option_blacklist_info yuga_clm920_nc5_blacklist = {
+       .reserved = BIT(1) | BIT(4),
+};
+
 static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) },
        { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) },
@@ -1178,6 +1190,9 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)},
        { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20),
          .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+       /* Yuga products use Qualcomm vendor ID */
+       { USB_DEVICE(QUALCOMM_VENDOR_ID, YUGA_PRODUCT_CLM920_NC5),
+         .driver_info = (kernel_ulong_t)&yuga_clm920_nc5_blacklist },
        /* Quectel products using Quectel vendor ID */
        { USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21),
          .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
@@ -1244,6 +1259,8 @@ static const struct usb_device_id option_ids[] = {
                .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
                .driver_info = (kernel_ulong_t)&telit_me910_blacklist },
+       { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
+               .driver_info = (kernel_ulong_t)&telit_me910_dual_modem_blacklist },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
                .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
        { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4),
index e3892541a489940f58bfa8f534ff0e765fc527f4..613f91add03da189c0fd5334cbccbbf720060079 100644 (file)
@@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = {
        {DEVICE_SWI(0x1199, 0x9079)},   /* Sierra Wireless EM74xx */
        {DEVICE_SWI(0x1199, 0x907a)},   /* Sierra Wireless EM74xx QDL */
        {DEVICE_SWI(0x1199, 0x907b)},   /* Sierra Wireless EM74xx */
+       {DEVICE_SWI(0x1199, 0x9090)},   /* Sierra Wireless EM7565 QDL */
+       {DEVICE_SWI(0x1199, 0x9091)},   /* Sierra Wireless EM7565 */
        {DEVICE_SWI(0x413c, 0x81a2)},   /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
        {DEVICE_SWI(0x413c, 0x81a3)},   /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
        {DEVICE_SWI(0x413c, 0x81a4)},   /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
@@ -342,6 +344,7 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
                        break;
                case 2:
                        dev_dbg(dev, "NMEA GPS interface found\n");
+                       sendsetup = true;
                        break;
                case 3:
                        dev_dbg(dev, "Modem port found\n");
index e6127fb21c123f397099dcae0ba5947ca7772061..a7d08ae0adaddc257c6399f0f8c5897fc16e10ff 100644 (file)
@@ -143,6 +143,13 @@ UNUSUAL_DEV(0x2109, 0x0711, 0x0000, 0x9999,
                USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                US_FL_NO_ATA_1X),
 
+/* Reported-by: Icenowy Zheng <icenowy@aosc.io> */
+UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999,
+               "Norelsys",
+               "NS1068X",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_IGNORE_UAS),
+
 /* Reported-by: Takeo Nakayama <javhera@gmx.com> */
 UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999,
                "JMicron",
index a3df8ee82faff77d9c114ed605412ccfb0704ce6..e31a6f204397dba47b3356ec3bbfe420539dc05b 100644 (file)
@@ -149,8 +149,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
         * step 1?
         */
        if (ud->tcp_socket) {
-               dev_dbg(&sdev->udev->dev, "shutdown tcp_socket %p\n",
-                       ud->tcp_socket);
+               dev_dbg(&sdev->udev->dev, "shutdown sockfd %d\n", ud->sockfd);
                kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
        }
 
index 4f48b306713f1a62942905122c9164beb134ce5f..c31c8402a0c55ddd2f4b463ebabd28be6438f490 100644 (file)
@@ -237,11 +237,12 @@ void stub_device_cleanup_urbs(struct stub_device *sdev)
        struct stub_priv *priv;
        struct urb *urb;
 
-       dev_dbg(&sdev->udev->dev, "free sdev %p\n", sdev);
+       dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n");
 
        while ((priv = stub_priv_pop(sdev))) {
                urb = priv->urb;
-               dev_dbg(&sdev->udev->dev, "free urb %p\n", urb);
+               dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n",
+                       priv->seqnum);
                usb_kill_urb(urb);
 
                kmem_cache_free(stub_priv_cache, priv);
index 493ac2928391accc4984e3ceddbfdadd2690a26c..6c5a593139996fe11be9c2bf86eb8cc19a390ef1 100644 (file)
@@ -211,9 +211,6 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
                if (priv->seqnum != pdu->u.cmd_unlink.seqnum)
                        continue;
 
-               dev_info(&priv->urb->dev->dev, "unlink urb %p\n",
-                        priv->urb);
-
                /*
                 * This matched urb is not completed yet (i.e., be in
                 * flight in usb hcd hardware/driver). Now we are
@@ -252,8 +249,8 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev,
                ret = usb_unlink_urb(priv->urb);
                if (ret != -EINPROGRESS)
                        dev_err(&priv->urb->dev->dev,
-                               "failed to unlink a urb %p, ret %d\n",
-                               priv->urb, ret);
+                               "failed to unlink a urb # %lu, ret %d\n",
+                               priv->seqnum, ret);
 
                return 0;
        }
@@ -342,14 +339,6 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu)
 
        epd = &ep->desc;
 
-       /* validate transfer_buffer_length */
-       if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) {
-               dev_err(&sdev->udev->dev,
-                       "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n",
-                       pdu->u.cmd_submit.transfer_buffer_length);
-               return -1;
-       }
-
        if (usb_endpoint_xfer_control(epd)) {
                if (dir == USBIP_DIR_OUT)
                        return usb_sndctrlpipe(udev, epnum);
@@ -482,8 +471,7 @@ static void stub_recv_cmd_submit(struct stub_device *sdev,
        }
 
        /* allocate urb transfer buffer, if needed */
-       if (pdu->u.cmd_submit.transfer_buffer_length > 0 &&
-           pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) {
+       if (pdu->u.cmd_submit.transfer_buffer_length > 0) {
                priv->urb->transfer_buffer =
                        kzalloc(pdu->u.cmd_submit.transfer_buffer_length,
                                GFP_KERNEL);
index 53172b1f6257cf9f8d72dac57212c0ec939a8dc4..f0ec41a50cbc16f9814ca8b2a7590dbe3c465fab 100644 (file)
@@ -88,7 +88,7 @@ void stub_complete(struct urb *urb)
        /* link a urb to the queue of tx. */
        spin_lock_irqsave(&sdev->priv_lock, flags);
        if (sdev->ud.tcp_socket == NULL) {
-               usbip_dbg_stub_tx("ignore urb for closed connection %p", urb);
+               usbip_dbg_stub_tx("ignore urb for closed connection\n");
                /* It will be freed in stub_device_cleanup_urbs(). */
        } else if (priv->unlinking) {
                stub_enqueue_ret_unlink(sdev, priv->seqnum, urb->status);
@@ -190,8 +190,8 @@ static int stub_send_ret_submit(struct stub_device *sdev)
 
                /* 1. setup usbip_header */
                setup_ret_submit_pdu(&pdu_header, urb);
-               usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
-                                 pdu_header.base.seqnum, urb);
+               usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+                                 pdu_header.base.seqnum);
                usbip_header_correct_endian(&pdu_header, 1);
 
                iov[iovnum].iov_base = &pdu_header;
index f7978933b40290787ded82fd7b244783610d206e..ee2bbce24584c1b28e9ea81fff476328635d7707 100644 (file)
@@ -91,7 +91,7 @@ static void usbip_dump_usb_device(struct usb_device *udev)
        dev_dbg(dev, "       devnum(%d) devpath(%s) usb speed(%s)",
                udev->devnum, udev->devpath, usb_speed_string(udev->speed));
 
-       pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
+       pr_debug("tt hub ttport %d\n", udev->ttport);
 
        dev_dbg(dev, "                    ");
        for (i = 0; i < 16; i++)
@@ -124,12 +124,8 @@ static void usbip_dump_usb_device(struct usb_device *udev)
        }
        pr_debug("\n");
 
-       dev_dbg(dev, "parent %p, bus %p\n", udev->parent, udev->bus);
-
-       dev_dbg(dev,
-               "descriptor %p, config %p, actconfig %p, rawdescriptors %p\n",
-               &udev->descriptor, udev->config,
-               udev->actconfig, udev->rawdescriptors);
+       dev_dbg(dev, "parent %s, bus %s\n", dev_name(&udev->parent->dev),
+               udev->bus->bus_name);
 
        dev_dbg(dev, "have_langid %d, string_langid %d\n",
                udev->have_langid, udev->string_langid);
@@ -237,9 +233,6 @@ void usbip_dump_urb(struct urb *urb)
 
        dev = &urb->dev->dev;
 
-       dev_dbg(dev, "   urb                   :%p\n", urb);
-       dev_dbg(dev, "   dev                   :%p\n", urb->dev);
-
        usbip_dump_usb_device(urb->dev);
 
        dev_dbg(dev, "   pipe                  :%08x ", urb->pipe);
@@ -248,11 +241,9 @@ void usbip_dump_urb(struct urb *urb)
 
        dev_dbg(dev, "   status                :%d\n", urb->status);
        dev_dbg(dev, "   transfer_flags        :%08X\n", urb->transfer_flags);
-       dev_dbg(dev, "   transfer_buffer       :%p\n", urb->transfer_buffer);
        dev_dbg(dev, "   transfer_buffer_length:%d\n",
                                                urb->transfer_buffer_length);
        dev_dbg(dev, "   actual_length         :%d\n", urb->actual_length);
-       dev_dbg(dev, "   setup_packet          :%p\n", urb->setup_packet);
 
        if (urb->setup_packet && usb_pipetype(urb->pipe) == PIPE_CONTROL)
                usbip_dump_usb_ctrlrequest(
@@ -262,8 +253,6 @@ void usbip_dump_urb(struct urb *urb)
        dev_dbg(dev, "   number_of_packets     :%d\n", urb->number_of_packets);
        dev_dbg(dev, "   interval              :%d\n", urb->interval);
        dev_dbg(dev, "   error_count           :%d\n", urb->error_count);
-       dev_dbg(dev, "   context               :%p\n", urb->context);
-       dev_dbg(dev, "   complete              :%p\n", urb->complete);
 }
 EXPORT_SYMBOL_GPL(usbip_dump_urb);
 
@@ -317,26 +306,20 @@ int usbip_recv(struct socket *sock, void *buf, int size)
        struct msghdr msg = {.msg_flags = MSG_NOSIGNAL};
        int total = 0;
 
+       if (!sock || !buf || !size)
+               return -EINVAL;
+
        iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
 
        usbip_dbg_xmit("enter\n");
 
-       if (!sock || !buf || !size) {
-               pr_err("invalid arg, sock %p buff %p size %d\n", sock, buf,
-                      size);
-               return -EINVAL;
-       }
-
        do {
-               int sz = msg_data_left(&msg);
+               msg_data_left(&msg);
                sock->sk->sk_allocation = GFP_NOIO;
 
                result = sock_recvmsg(sock, &msg, MSG_WAITALL);
-               if (result <= 0) {
-                       pr_debug("receive sock %p buf %p size %u ret %d total %d\n",
-                                sock, buf + total, sz, result, total);
+               if (result <= 0)
                        goto err;
-               }
 
                total += result;
        } while (msg_data_left(&msg));
index 6b3278c4b72a0d745a724b3ef93cce0dc3dccd7d..c3e1008aa491ee45071adab6215f7440aa6dfa13 100644 (file)
@@ -656,9 +656,6 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
        struct vhci_device *vdev;
        unsigned long flags;
 
-       usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
-                         hcd, urb, mem_flags);
-
        if (portnum > VHCI_HC_PORTS) {
                pr_err("invalid port number %d\n", portnum);
                return -ENODEV;
@@ -822,8 +819,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
        struct vhci_device *vdev;
        unsigned long flags;
 
-       pr_info("dequeue a urb %p\n", urb);
-
        spin_lock_irqsave(&vhci->lock, flags);
 
        priv = urb->hcpriv;
@@ -851,7 +846,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                /* tcp connection is closed */
                spin_lock(&vdev->priv_lock);
 
-               pr_info("device %p seems to be disconnected\n", vdev);
                list_del(&priv->list);
                kfree(priv);
                urb->hcpriv = NULL;
@@ -863,8 +857,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
                 * vhci_rx will receive RET_UNLINK and give back the URB.
                 * Otherwise, we give back it here.
                 */
-               pr_info("gives back urb %p\n", urb);
-
                usb_hcd_unlink_urb_from_ep(hcd, urb);
 
                spin_unlock_irqrestore(&vhci->lock, flags);
@@ -892,8 +884,6 @@ static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 
                unlink->unlink_seqnum = priv->seqnum;
 
-               pr_info("device %p seems to be still connected\n", vdev);
-
                /* send cmd_unlink and try to cancel the pending URB in the
                 * peer */
                list_add_tail(&unlink->list, &vdev->unlink_tx);
@@ -975,7 +965,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
 
        /* need this? see stub_dev.c */
        if (ud->tcp_socket) {
-               pr_debug("shutdown tcp_socket %p\n", ud->tcp_socket);
+               pr_debug("shutdown tcp_socket %d\n", ud->sockfd);
                kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
        }
 
index 90577e8b2282393014558da959125345b1f7008c..112ebb90d8c95e0eb8ba3aa800d3b66941cb7e6c 100644 (file)
@@ -23,24 +23,23 @@ struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum)
                urb = priv->urb;
                status = urb->status;
 
-               usbip_dbg_vhci_rx("find urb %p vurb %p seqnum %u\n",
-                               urb, priv, seqnum);
+               usbip_dbg_vhci_rx("find urb seqnum %u\n", seqnum);
 
                switch (status) {
                case -ENOENT:
                        /* fall through */
                case -ECONNRESET:
-                       dev_info(&urb->dev->dev,
-                                "urb %p was unlinked %ssynchronuously.\n", urb,
-                                status == -ENOENT ? "" : "a");
+                       dev_dbg(&urb->dev->dev,
+                                "urb seq# %u was unlinked %ssynchronuously\n",
+                                seqnum, status == -ENOENT ? "" : "a");
                        break;
                case -EINPROGRESS:
                        /* no info output */
                        break;
                default:
-                       dev_info(&urb->dev->dev,
-                                "urb %p may be in a error, status %d\n", urb,
-                                status);
+                       dev_dbg(&urb->dev->dev,
+                                "urb seq# %u may be in a error, status %d\n",
+                                seqnum, status);
                }
 
                list_del(&priv->list);
@@ -67,8 +66,8 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
        spin_unlock_irqrestore(&vdev->priv_lock, flags);
 
        if (!urb) {
-               pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum);
-               pr_info("max seqnum %d\n",
+               pr_err("cannot find a urb of seqnum %u max seqnum %d\n",
+                       pdu->base.seqnum,
                        atomic_read(&vhci_hcd->seqnum));
                usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
                return;
@@ -91,7 +90,7 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
        if (usbip_dbg_flag_vhci_rx)
                usbip_dump_urb(urb);
 
-       usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+       usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum);
 
        spin_lock_irqsave(&vhci->lock, flags);
        usb_hcd_unlink_urb_from_ep(vhci_hcd_to_hcd(vhci_hcd), urb);
@@ -158,7 +157,7 @@ static void vhci_recv_ret_unlink(struct vhci_device *vdev,
                pr_info("the urb (seqnum %d) was already given back\n",
                        pdu->base.seqnum);
        } else {
-               usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
+               usbip_dbg_vhci_rx("now giveback urb %d\n", pdu->base.seqnum);
 
                /* If unlink is successful, status is -ECONNRESET */
                urb->status = pdu->u.ret_unlink.status;
index d625a2ff4b712f4112bd48b8115ea0104e005d15..9aed15a358b7b98b0fab9e6b02b6820cff9ff808 100644 (file)
@@ -69,7 +69,8 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev)
                memset(&msg, 0, sizeof(msg));
                memset(&iov, 0, sizeof(iov));
 
-               usbip_dbg_vhci_tx("setup txdata urb %p\n", urb);
+               usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n",
+                                 priv->seqnum);
 
                /* 1. setup usbip_header */
                setup_cmd_submit_pdu(&pdu_header, urb);
index df1e309891488eebd0e5439738140c22654cc711..1e8a23d92cb4b86bd5ae7173b6b35ac8e37392f9 100644 (file)
@@ -120,6 +120,25 @@ static int v_recv_cmd_submit(struct vudc *udc,
        urb_p->new = 1;
        urb_p->seqnum = pdu->base.seqnum;
 
+       if (urb_p->ep->type == USB_ENDPOINT_XFER_ISOC) {
+               /* validate packet size and number of packets */
+               unsigned int maxp, packets, bytes;
+
+               maxp = usb_endpoint_maxp(urb_p->ep->desc);
+               maxp *= usb_endpoint_maxp_mult(urb_p->ep->desc);
+               bytes = pdu->u.cmd_submit.transfer_buffer_length;
+               packets = DIV_ROUND_UP(bytes, maxp);
+
+               if (pdu->u.cmd_submit.number_of_packets < 0 ||
+                   pdu->u.cmd_submit.number_of_packets > packets) {
+                       dev_err(&udc->gadget.dev,
+                               "CMD_SUBMIT: isoc invalid num packets %d\n",
+                               pdu->u.cmd_submit.number_of_packets);
+                       ret = -EMSGSIZE;
+                       goto free_urbp;
+               }
+       }
+
        ret = alloc_urb_from_cmd(&urb_p->urb, pdu, urb_p->ep->type);
        if (ret) {
                usbip_event_add(&udc->ud, VUDC_EVENT_ERROR_MALLOC);
index 1440ae0919ec7c5a5f2fc62903faa43c2aeee5ed..3ccb17c3e84060a907afb8c2914e3f69039f753d 100644 (file)
@@ -85,6 +85,13 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
        memset(&pdu_header, 0, sizeof(pdu_header));
        memset(&msg, 0, sizeof(msg));
 
+       if (urb->actual_length > 0 && !urb->transfer_buffer) {
+               dev_err(&udc->gadget.dev,
+                       "urb: actual_length %d transfer_buffer null\n",
+                       urb->actual_length);
+               return -1;
+       }
+
        if (urb_p->type == USB_ENDPOINT_XFER_ISOC)
                iovnum = 2 + urb->number_of_packets;
        else
@@ -100,8 +107,8 @@ static int v_send_ret_submit(struct vudc *udc, struct urbp *urb_p)
 
        /* 1. setup usbip_header */
        setup_ret_submit_pdu(&pdu_header, urb_p);
-       usbip_dbg_stub_tx("setup txdata seqnum: %d urb: %p\n",
-                         pdu_header.base.seqnum, urb);
+       usbip_dbg_stub_tx("setup txdata seqnum: %d\n",
+                         pdu_header.base.seqnum);
        usbip_header_correct_endian(&pdu_header, 1);
 
        iov[iovnum].iov_base = &pdu_header;
index f77e499afdddb02c2d7595459a70b15f3a8c56d5..065f0b607373402a0d4dd7520b9820503390fb0a 100644 (file)
@@ -257,10 +257,25 @@ static void release_memory_resource(struct resource *resource)
        kfree(resource);
 }
 
+/*
+ * Host memory not allocated to dom0. We can use this range for hotplug-based
+ * ballooning.
+ *
+ * It's a type-less resource. Setting IORESOURCE_MEM will make resource
+ * management algorithms (arch_remove_reservations()) look into guest e820,
+ * which we don't want.
+ */
+static struct resource hostmem_resource = {
+       .name   = "Host RAM",
+};
+
+void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
+{}
+
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
-       struct resource *res;
-       int ret;
+       struct resource *res, *res_hostmem;
+       int ret = -ENOMEM;
 
        res = kzalloc(sizeof(*res), GFP_KERNEL);
        if (!res)
@@ -269,13 +284,42 @@ static struct resource *additional_memory_resource(phys_addr_t size)
        res->name = "System RAM";
        res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
-       ret = allocate_resource(&iomem_resource, res,
-                               size, 0, -1,
-                               PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
-       if (ret < 0) {
-               pr_err("Cannot allocate new System RAM resource\n");
-               kfree(res);
-               return NULL;
+       res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
+       if (res_hostmem) {
+               /* Try to grab a range from hostmem */
+               res_hostmem->name = "Host memory";
+               ret = allocate_resource(&hostmem_resource, res_hostmem,
+                                       size, 0, -1,
+                                       PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+       }
+
+       if (!ret) {
+               /*
+                * Insert this resource into iomem. Because hostmem_resource
+                * tracks portion of guest e820 marked as UNUSABLE noone else
+                * should try to use it.
+                */
+               res->start = res_hostmem->start;
+               res->end = res_hostmem->end;
+               ret = insert_resource(&iomem_resource, res);
+               if (ret < 0) {
+                       pr_err("Can't insert iomem_resource [%llx - %llx]\n",
+                               res->start, res->end);
+                       release_memory_resource(res_hostmem);
+                       res_hostmem = NULL;
+                       res->start = res->end = 0;
+               }
+       }
+
+       if (ret) {
+               ret = allocate_resource(&iomem_resource, res,
+                                       size, 0, -1,
+                                       PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+               if (ret < 0) {
+                       pr_err("Cannot allocate new System RAM resource\n");
+                       kfree(res);
+                       return NULL;
+               }
        }
 
 #ifdef CONFIG_SPARSEMEM
@@ -287,6 +331,7 @@ static struct resource *additional_memory_resource(phys_addr_t size)
                        pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
                               pfn, limit);
                        release_memory_resource(res);
+                       release_memory_resource(res_hostmem);
                        return NULL;
                }
        }
@@ -765,6 +810,8 @@ static int __init balloon_init(void)
        set_online_page_callback(&xen_online_page);
        register_memory_notifier(&xen_memory_nb);
        register_sysctl_table(xen_root);
+
+       arch_xen_balloon_init(&hostmem_resource);
 #endif
 
 #ifdef CONFIG_XEN_PV
index 57efbd3b053b37ca43816483dd3364c3c48a761e..bd56653b9bbc2bed8e27c4909b2e8c025c4f1627 100644 (file)
@@ -380,10 +380,8 @@ static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
                }
                range = 0;
                while (range < pages) {
-                       if (map->unmap_ops[offset+range].handle == -1) {
-                               range--;
+                       if (map->unmap_ops[offset+range].handle == -1)
                                break;
-                       }
                        range++;
                }
                err = __unmap_grant_pages(map, offset, range);
@@ -1073,8 +1071,10 @@ unlock_out:
 out_unlock_put:
        mutex_unlock(&priv->lock);
 out_put_map:
-       if (use_ptemod)
+       if (use_ptemod) {
                map->vma = NULL;
+               unmap_grant_pages(map, 0, map->count);
+       }
        gntdev_put_map(priv, map);
        return err;
 }
index d1e1d8d2b9d545b00eb87085b6f941f04900cd74..4c789e61554b2d3a9d9c01c4384a5d77a2b978d4 100644 (file)
@@ -805,7 +805,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
                pvcalls_exit();
                return ret;
        }
-       map2 = kzalloc(sizeof(*map2), GFP_KERNEL);
+       map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
        if (map2 == NULL) {
                clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
                          (void *)&map->passive.flags);
index ff8d5bf4354f306227a297ca542078580ca57e34..23c7f395d7182705d945082576ec59a15af3226f 100644 (file)
@@ -895,20 +895,38 @@ error:
  * However, if we didn't have a callback promise outstanding, or it was
  * outstanding on a different server, then it won't break it either...
  */
-static int afs_dir_remove_link(struct dentry *dentry, struct key *key)
+static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
+                              unsigned long d_version_before,
+                              unsigned long d_version_after)
 {
+       bool dir_valid;
        int ret = 0;
 
+       /* There were no intervening changes on the server if the version
+        * number we got back was incremented by exactly 1.
+        */
+       dir_valid = (d_version_after == d_version_before + 1);
+
        if (d_really_is_positive(dentry)) {
                struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 
-               if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
-                       kdebug("AFS_VNODE_DELETED");
-               clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
-
-               ret = afs_validate(vnode, key);
-               if (ret == -ESTALE)
+               if (dir_valid) {
+                       drop_nlink(&vnode->vfs_inode);
+                       if (vnode->vfs_inode.i_nlink == 0) {
+                               set_bit(AFS_VNODE_DELETED, &vnode->flags);
+                               clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+                       }
                        ret = 0;
+               } else {
+                       clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+
+                       if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+                               kdebug("AFS_VNODE_DELETED");
+
+                       ret = afs_validate(vnode, key);
+                       if (ret == -ESTALE)
+                               ret = 0;
+               }
                _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
        }
 
@@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
        struct afs_fs_cursor fc;
        struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
        struct key *key;
+       unsigned long d_version = (unsigned long)dentry->d_fsdata;
        int ret;
 
        _enter("{%x:%u},{%pd}",
@@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
                afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
                ret = afs_end_vnode_operation(&fc);
                if (ret == 0)
-                       ret = afs_dir_remove_link(dentry, key);
+                       ret = afs_dir_remove_link(
+                               dentry, key, d_version,
+                               (unsigned long)dvnode->status.data_version);
        }
 
 error_key:
index 3415eb7484f6ba5653e3f08c8e3035ab29b2eaa3..1e81864ef0b29bffcc10944c30bc57796815025d 100644 (file)
@@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
        }
 
        read_sequnlock_excl(&vnode->cb_lock);
+
+       if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
+               clear_nlink(&vnode->vfs_inode);
+
        if (valid)
                goto valid;
 
index ea1460b9b71aab5900b342e28641924a0f7b9f04..e1126659f043f0c445cc19fa9524cc5d96afd36a 100644 (file)
@@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 {
        struct afs_net *net = call->net;
        enum afs_call_state state;
-       u32 remote_abort;
+       u32 remote_abort = 0;
        int ret;
 
        _enter("{%s,%zu},,%zu,%d",
index cb5f8a3df5773cba37c292e65ac6e985b0af11bc..9370e2feb999303098d36aab4234d9f2fef7f24a 100644 (file)
@@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
                        ret = afs_fill_page(vnode, key, pos + copied,
                                            len - copied, page);
                        if (ret < 0)
-                               return ret;
+                               goto out;
                }
                SetPageUptodate(page);
        }
@@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping,
        set_page_dirty(page);
        if (PageDirty(page))
                _debug("dirtied");
+       ret = copied;
+
+out:
        unlock_page(page);
        put_page(page);
-
-       return copied;
+       return ret;
 }
 
 /*
index 5d73f79ded8bcbd967636b652d98433da64cceb8..056276101c63a7060b09517353ba24cd00cde9a5 100644 (file)
@@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
 
        spin_lock(&root->inode_lock);
        node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
+
        if (node) {
                if (btrfs_inode->delayed_node) {
                        refcount_inc(&node->refs);      /* can be accessed */
@@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
                        spin_unlock(&root->inode_lock);
                        return node;
                }
-               btrfs_inode->delayed_node = node;
-               /* can be accessed and cached in the inode */
-               refcount_add(2, &node->refs);
+
+               /*
+                * It's possible that we're racing into the middle of removing
+                * this node from the radix tree.  In this case, the refcount
+                * was zero and it should never go back to one.  Just return
+                * NULL like it was never in the radix at all; our release
+                * function is in the process of removing it.
+                *
+                * Some implementations of refcount_inc refuse to bump the
+                * refcount once it has hit zero.  If we don't do this dance
+                * here, refcount_inc() may decide to just WARN_ONCE() instead
+                * of actually bumping the refcount.
+                *
+                * If this node is properly in the radix, we want to bump the
+                * refcount twice, once for the inode and once for this get
+                * operation.
+                */
+               if (refcount_inc_not_zero(&node->refs)) {
+                       refcount_inc(&node->refs);
+                       btrfs_inode->delayed_node = node;
+               } else {
+                       node = NULL;
+               }
+
                spin_unlock(&root->inode_lock);
                return node;
        }
@@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node(
        mutex_unlock(&delayed_node->mutex);
 
        if (refcount_dec_and_test(&delayed_node->refs)) {
-               bool free = false;
                struct btrfs_root *root = delayed_node->root;
+
                spin_lock(&root->inode_lock);
-               if (refcount_read(&delayed_node->refs) == 0) {
-                       radix_tree_delete(&root->delayed_nodes_tree,
-                                         delayed_node->inode_id);
-                       free = true;
-               }
+               /*
+                * Once our refcount goes to zero, nobody is allowed to bump it
+                * back up.  We can delete it now.
+                */
+               ASSERT(refcount_read(&delayed_node->refs) == 0);
+               radix_tree_delete(&root->delayed_nodes_tree,
+                                 delayed_node->inode_id);
                spin_unlock(&root->inode_lock);
-               if (free)
-                       kmem_cache_free(delayed_node_cache, delayed_node);
+               kmem_cache_free(delayed_node_cache, delayed_node);
        }
 }
 
index 49810b70afd3941721246497d94c754ec2120619..a256842875017b386a2a349f734f5feff7391484 100644 (file)
@@ -237,7 +237,6 @@ static struct btrfs_device *__alloc_device(void)
                kfree(dev);
                return ERR_PTR(-ENOMEM);
        }
-       bio_get(dev->flush_bio);
 
        INIT_LIST_HEAD(&dev->dev_list);
        INIT_LIST_HEAD(&dev->dev_alloc_list);
index 5688b5e1b9378107597a6117c8c3732889f951d2..7eb8d21bcab94b51905071d12a5205e7ac2ef194 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1349,9 +1349,14 @@ void setup_new_exec(struct linux_binprm * bprm)
 
        current->sas_ss_sp = current->sas_ss_size = 0;
 
-       /* Figure out dumpability. */
+       /*
+        * Figure out dumpability. Note that this checking only of current
+        * is wrong, but userspace depends on it. This should be testing
+        * bprm->secureexec instead.
+        */
        if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
-           bprm->secureexec)
+           !(uid_eq(current_euid(), current_uid()) &&
+             gid_eq(current_egid(), current_gid())))
                set_dumpable(current->mm, suid_dumpable);
        else
                set_dumpable(current->mm, SUID_DUMP_USER);
index 7ff1349609e4874a35268876065490d77f5e01ab..06bd25d90ba591f45c6f772e218ffce91c915dbc 100644 (file)
@@ -517,7 +517,11 @@ retry:
        hlist_add_head(&s->s_instances, &type->fs_supers);
        spin_unlock(&sb_lock);
        get_filesystem(type);
-       register_shrinker(&s->s_shrink);
+       err = register_shrinker(&s->s_shrink);
+       if (err) {
+               deactivate_locked_super(s);
+               s = ERR_PTR(err);
+       }
        return s;
 }
 
index ac9a4e65ca497ad3b673dc50893d13b44150c70f..41a75f9f23fdbfc2d30ae80b979a331484f0dcf9 100644 (file)
@@ -570,11 +570,14 @@ out:
 static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                                              struct userfaultfd_wait_queue *ewq)
 {
+       struct userfaultfd_ctx *release_new_ctx;
+
        if (WARN_ON_ONCE(current->flags & PF_EXITING))
                goto out;
 
        ewq->ctx = ctx;
        init_waitqueue_entry(&ewq->wq, current);
+       release_new_ctx = NULL;
 
        spin_lock(&ctx->event_wqh.lock);
        /*
@@ -601,8 +604,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
                                new = (struct userfaultfd_ctx *)
                                        (unsigned long)
                                        ewq->msg.arg.reserved.reserved1;
-
-                               userfaultfd_ctx_put(new);
+                               release_new_ctx = new;
                        }
                        break;
                }
@@ -617,6 +619,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
        __set_current_state(TASK_RUNNING);
        spin_unlock(&ctx->event_wqh.lock);
 
+       if (release_new_ctx) {
+               struct vm_area_struct *vma;
+               struct mm_struct *mm = release_new_ctx->mm;
+
+               /* the various vma->vm_userfaultfd_ctx still points to it */
+               down_write(&mm->mmap_sem);
+               for (vma = mm->mmap; vma; vma = vma->vm_next)
+                       if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx)
+                               vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
+               up_write(&mm->mmap_sem);
+
+               userfaultfd_ctx_put(release_new_ctx);
+       }
+
        /*
         * ctx may go away after this if the userfault pseudo fd is
         * already released.
index 0da80019a9173cba18811abc71e39ca4631a7449..83ed7715f856d2025509c308583436e63d0043a5 100644 (file)
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
        ASSERT(args->agbno % args->alignment == 0);
 
        /* if not file data, insert new block into the reverse map btree */
-       if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+       if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
                error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
                                       args->agbno, args->len, &args->oinfo);
                if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
        bno_cur = cnt_cur = NULL;
        mp = tp->t_mountp;
 
-       if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) {
+       if (!xfs_rmap_should_skip_owner_update(oinfo)) {
                error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
                if (error)
                        goto error0;
index 6249c92671debe20a45e3cb0577360552e36d523..a76914db72ef11094cd8d74e5bd6cd6add2a3fde 100644 (file)
@@ -212,6 +212,7 @@ xfs_attr_set(
        int                     flags)
 {
        struct xfs_mount        *mp = dp->i_mount;
+       struct xfs_buf          *leaf_bp = NULL;
        struct xfs_da_args      args;
        struct xfs_defer_ops    dfops;
        struct xfs_trans_res    tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
                 * GROT: another possible req'mt for a double-split btree op.
                 */
                xfs_defer_init(args.dfops, args.firstblock);
-               error = xfs_attr_shortform_to_leaf(&args);
+               error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
                if (error)
                        goto out_defer_cancel;
+               /*
+                * Prevent the leaf buffer from being unlocked so that a
+                * concurrent AIL push cannot grab the half-baked leaf
+                * buffer and run into problems with the write verifier.
+                */
+               xfs_trans_bhold(args.trans, leaf_bp);
+               xfs_defer_bjoin(args.dfops, leaf_bp);
                xfs_defer_ijoin(args.dfops, dp);
                error = xfs_defer_finish(&args.trans, args.dfops);
                if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
 
                /*
                 * Commit the leaf transformation.  We'll need another (linked)
-                * transaction to add the new attribute to the leaf.
+                * transaction to add the new attribute to the leaf, which
+                * means that we have to hold & join the leaf buffer here too.
                 */
-
                error = xfs_trans_roll_inode(&args.trans, dp);
                if (error)
                        goto out;
-
+               xfs_trans_bjoin(args.trans, leaf_bp);
+               leaf_bp = NULL;
        }
 
        if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
 
 out_defer_cancel:
        xfs_defer_cancel(&dfops);
-       args.trans = NULL;
 out:
+       if (leaf_bp)
+               xfs_trans_brelse(args.trans, leaf_bp);
        if (args.trans)
                xfs_trans_cancel(args.trans);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
index 53cc8b986eac45c4e5ec4afd319891b6e9f716c5..601eaa36f1ada22e2213f9178bcb5cdb5868034d 100644 (file)
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
 }
 
 /*
- * Convert from using the shortform to the leaf.
+ * Convert from using the shortform to the leaf.  On success, return the
+ * buffer so that we can keep it locked until we're totally done with it.
  */
 int
-xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
+xfs_attr_shortform_to_leaf(
+       struct xfs_da_args      *args,
+       struct xfs_buf          **leaf_bp)
 {
        xfs_inode_t *dp;
        xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
                sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
        }
        error = 0;
-
+       *leaf_bp = bp;
 out:
        kmem_free(tmpbuffer);
        return error;
index f7dda0c237b044b166d6d178fca3178feff2b644..894124efb421e0d0674b0f39bf63dabfe7916937 100644 (file)
@@ -48,7 +48,8 @@ void  xfs_attr_shortform_create(struct xfs_da_args *args);
 void   xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
 int    xfs_attr_shortform_lookup(struct xfs_da_args *args);
 int    xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
+int    xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
+                       struct xfs_buf **leaf_bp);
 int    xfs_attr_shortform_remove(struct xfs_da_args *args);
 int    xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int    xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
index 1210f684d3c28f9af8d8403c1f0222ef06dc380b..1bddbba6b80c960bdcc10c9a30210c119e1b2f77 100644 (file)
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
         * blowing out the transaction with a mix of EFIs and reflink
         * adjustments.
         */
-       if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
+       if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
                max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
        else
                max_len = len;
index 072ebfe1d6aeb3e00e306a06d71a1b478382f3ad..087fea02c3892c34e1e63df2839bd8d804ba1f3f 100644 (file)
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
        for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
 
+       /* Hold the (previously bjoin'd) buffer locked across the roll. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
+               xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
+
        trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
 
        /* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
        for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
                xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
 
+       /* Rejoin the buffers and dirty them so the log moves forward. */
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
+               xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
+               xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+       }
+
        return error;
 }
 
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
                }
        }
 
+       ASSERT(0);
+       return -EFSCORRUPTED;
+}
+
+/*
+ * Add this buffer to the deferred op.  Each joined buffer is relogged
+ * each time we roll the transaction.
+ */
+int
+xfs_defer_bjoin(
+       struct xfs_defer_ops            *dop,
+       struct xfs_buf                  *bp)
+{
+       int                             i;
+
+       for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+               if (dop->dop_bufs[i] == bp)
+                       return 0;
+               else if (dop->dop_bufs[i] == NULL) {
+                       dop->dop_bufs[i] = bp;
+                       return 0;
+               }
+       }
+
+       ASSERT(0);
        return -EFSCORRUPTED;
 }
 
@@ -493,9 +528,7 @@ xfs_defer_init(
        struct xfs_defer_ops            *dop,
        xfs_fsblock_t                   *fbp)
 {
-       dop->dop_committed = false;
-       dop->dop_low = false;
-       memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+       memset(dop, 0, sizeof(struct xfs_defer_ops));
        *fbp = NULLFSBLOCK;
        INIT_LIST_HEAD(&dop->dop_intake);
        INIT_LIST_HEAD(&dop->dop_pending);
index d4f046dd44bd4ae434d8104991d0327f2d2b9fc7..045beacdd37d81c9e01e0ab46ab2bbcbbb581fbb 100644 (file)
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
 };
 
 #define XFS_DEFER_OPS_NR_INODES        2       /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS  2       /* join up to two buffers */
 
 struct xfs_defer_ops {
        bool                    dop_committed;  /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
        struct list_head        dop_intake;     /* unlogged pending work */
        struct list_head        dop_pending;    /* logged pending work */
 
-       /* relog these inodes with each roll */
+       /* relog these with each roll */
        struct xfs_inode        *dop_inodes[XFS_DEFER_OPS_NR_INODES];
+       struct xfs_buf          *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
 };
 
 void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
 void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
 bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
 int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
 
 /* Description of a deferred type. */
 struct xfs_defer_op_type {
index 89bf16b4d9377293fa842c48f2b9b637f83c62c2..b0f31791c7e6137c0b7e46c35000e3c76e145ba1 100644 (file)
@@ -632,8 +632,6 @@ xfs_iext_insert(
        struct xfs_iext_leaf    *new = NULL;
        int                     nr_entries, i;
 
-       trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
-
        if (ifp->if_height == 0)
                xfs_iext_alloc_root(ifp, cur);
        else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
        xfs_iext_set(cur_rec(cur), irec);
        ifp->if_bytes += sizeof(struct xfs_iext_rec);
 
+       trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
+
        if (new)
                xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
 }
index 585b35d34142157863740d3b8cd437b37e49e05d..c40d26763075307b064d49bd3cb48dfce8dd5b67 100644 (file)
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
        xfs_extlen_t            aglen,
        struct xfs_defer_ops    *dfops)
 {
-       int                     error;
-
        trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
                        agbno, aglen);
 
        /* Add refcount btree reservation */
-       error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+       return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                        XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
-       if (error)
-               return error;
-
-       /* Add rmap entry */
-       if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-               error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
-                               rcur->bc_private.a.agno,
-                               agbno, aglen, XFS_RMAP_OWN_COW);
-               if (error)
-                       return error;
-       }
-
-       return error;
 }
 
 /*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
        xfs_extlen_t            aglen,
        struct xfs_defer_ops    *dfops)
 {
-       int                     error;
-
        trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
                        agbno, aglen);
 
        /* Remove refcount btree reservation */
-       error = xfs_refcount_adjust_cow(rcur, agbno, aglen,
+       return xfs_refcount_adjust_cow(rcur, agbno, aglen,
                        XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
-       if (error)
-               return error;
-
-       /* Remove rmap entry */
-       if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
-               error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
-                               rcur->bc_private.a.agno,
-                               agbno, aglen, XFS_RMAP_OWN_COW);
-               if (error)
-                       return error;
-       }
-
-       return error;
 }
 
 /* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
        xfs_fsblock_t                   fsb,
        xfs_extlen_t                    len)
 {
+       int                             error;
+
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return 0;
 
-       return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
+       error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
                        fsb, len);
+       if (error)
+               return error;
+
+       /* Add rmap entry */
+       return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                       XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
 }
 
 /* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
        xfs_fsblock_t                   fsb,
        xfs_extlen_t                    len)
 {
+       int                             error;
+
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return 0;
 
+       /* Remove rmap entry */
+       error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
+                       XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
+       if (error)
+               return error;
+
        return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
                        fsb, len);
 }
index dd019cee1b3bdccf4e08b25fd70b2c59220859a1..50db920ceeebbf077c2b3b13690066173ba7cf4e 100644 (file)
@@ -367,6 +367,51 @@ xfs_rmap_lookup_le_range(
        return error;
 }
 
+/*
+ * Perform all the relevant owner checks for a removal op.  If we're doing an
+ * unknown-owner removal then we have no owner information to check.
+ */
+static int
+xfs_rmap_free_check_owner(
+       struct xfs_mount        *mp,
+       uint64_t                ltoff,
+       struct xfs_rmap_irec    *rec,
+       xfs_fsblock_t           bno,
+       xfs_filblks_t           len,
+       uint64_t                owner,
+       uint64_t                offset,
+       unsigned int            flags)
+{
+       int                     error = 0;
+
+       if (owner == XFS_RMAP_OWN_UNKNOWN)
+               return 0;
+
+       /* Make sure the unwritten flag matches. */
+       XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
+                       (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
+
+       /* Make sure the owner matches what we expect to find in the tree. */
+       XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
+
+       /* Check the offset, if necessary. */
+       if (XFS_RMAP_NON_INODE_OWNER(owner))
+               goto out;
+
+       if (flags & XFS_RMAP_BMBT_BLOCK) {
+               XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
+                               out);
+       } else {
+               XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                               ltoff + rec->rm_blockcount >= offset + len,
+                               out);
+       }
+
+out:
+       return error;
+}
+
 /*
  * Find the extent in the rmap btree and remove it.
  *
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
                goto out_done;
        }
 
-       /* Make sure the unwritten flag matches. */
-       XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
-                       (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
+       /*
+        * If we're doing an unknown-owner removal for EFI recovery, we expect
+        * to find the full range in the rmapbt or nothing at all.  If we
+        * don't find any rmaps overlapping either end of the range, we're
+        * done.  Hopefully this means that the EFI creator already queued
+        * (and finished) a RUI to remove the rmap.
+        */
+       if (owner == XFS_RMAP_OWN_UNKNOWN &&
+           ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
+               struct xfs_rmap_irec    rtrec;
+
+               error = xfs_btree_increment(cur, 0, &i);
+               if (error)
+                       goto out_error;
+               if (i == 0)
+                       goto out_done;
+               error = xfs_rmap_get_rec(cur, &rtrec, &i);
+               if (error)
+                       goto out_error;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+               if (rtrec.rm_startblock >= bno + len)
+                       goto out_done;
+       }
 
        /* Make sure the extent we found covers the entire freeing range. */
        XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
-               ltrec.rm_startblock + ltrec.rm_blockcount >=
-               bno + len, out_error);
+                       ltrec.rm_startblock + ltrec.rm_blockcount >=
+                       bno + len, out_error);
 
-       /* Make sure the owner matches what we expect to find in the tree. */
-       XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner ||
-                                   XFS_RMAP_NON_INODE_OWNER(owner), out_error);
-
-       /* Check the offset, if necessary. */
-       if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
-               if (flags & XFS_RMAP_BMBT_BLOCK) {
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
-                                       out_error);
-               } else {
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltrec.rm_offset <= offset, out_error);
-                       XFS_WANT_CORRUPTED_GOTO(mp,
-                                       ltoff + ltrec.rm_blockcount >= offset + len,
-                                       out_error);
-               }
-       }
+       /* Check owner information. */
+       error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
+                       offset, flags);
+       if (error)
+               goto out_error;
 
        if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
                /* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
                flags |= XFS_RMAP_UNWRITTEN;
        trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
                        unwritten, oinfo);
+       ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
 
        /*
         * For the initial lookup, look for an exact match or the left-adjacent
index 466ede637080e5832046a96d62789eb2e46ed03f..0fcd5b1ba7295379081e0c61d230324447a8ae56 100644 (file)
@@ -61,7 +61,21 @@ static inline void
 xfs_rmap_skip_owner_update(
        struct xfs_owner_info   *oi)
 {
-       oi->oi_owner = XFS_RMAP_OWN_UNKNOWN;
+       xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
+}
+
+static inline bool
+xfs_rmap_should_skip_owner_update(
+       struct xfs_owner_info   *oi)
+{
+       return oi->oi_owner == XFS_RMAP_OWN_NULL;
+}
+
+static inline void
+xfs_rmap_any_owner_update(
+       struct xfs_owner_info   *oi)
+{
+       xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
 }
 
 /* Reverse mapping functions. */
index 21e2d70884e18edc2c765584f201a8b04604837c..4fc526a27a94fe4594508b707f0a9ca976c1dd98 100644 (file)
@@ -399,7 +399,7 @@ xfs_map_blocks(
               (ip->i_df.if_flags & XFS_IFEXTENTS));
        ASSERT(offset <= mp->m_super->s_maxbytes);
 
-       if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes)
+       if (offset > mp->m_super->s_maxbytes - count)
                count = mp->m_super->s_maxbytes - offset;
        end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -1312,7 +1312,7 @@ xfs_get_blocks(
        lockmode = xfs_ilock_data_map_shared(ip);
 
        ASSERT(offset <= mp->m_super->s_maxbytes);
-       if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes)
+       if (offset > mp->m_super->s_maxbytes - size)
                size = mp->m_super->s_maxbytes - offset;
        end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
index 44f8c54512102577dbe768e5f137298ba1ee79c7..64da90655e957c3fd01331720aa32093909ddad6 100644 (file)
@@ -538,7 +538,7 @@ xfs_efi_recover(
                return error;
        efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
 
-       xfs_rmap_skip_owner_update(&oinfo);
+       xfs_rmap_any_owner_update(&oinfo);
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                extp = &efip->efi_format.efi_extents[i];
                error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
index 8f22fc579dbba4abf9b609040802d24e9cf2f732..60a2e128cb6a59aa7181faa5c519d511a308bc5c 100644 (file)
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
                 * this doesn't actually exist in the rmap btree.
                 */
                xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
+               error = xfs_rmap_free(tp, bp, agno,
+                               be32_to_cpu(agf->agf_length) - new,
+                               new, &oinfo);
+               if (error)
+                       goto error0;
                error = xfs_free_extent(tp,
                                XFS_AGB_TO_FSB(mp, agno,
                                        be32_to_cpu(agf->agf_length) - new),
index 43005fbe8b1eefabc84ee762a9427ec784889814..3861d61fb265f66a9d39723286d5adc9772cc15e 100644 (file)
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
  * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
  * (We'll just piggyback on the post-EOF prealloc space workqueue.)
  */
-STATIC void
+void
 xfs_queue_cowblocks(
        struct xfs_mount *mp)
 {
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
        return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
 }
 
+static inline unsigned long
+xfs_iflag_for_tag(
+       int             tag)
+{
+       switch (tag) {
+       case XFS_ICI_EOFBLOCKS_TAG:
+               return XFS_IEOFBLOCKS;
+       case XFS_ICI_COWBLOCKS_TAG:
+               return XFS_ICOWBLOCKS;
+       default:
+               ASSERT(0);
+               return 0;
+       }
+}
+
 static void
-__xfs_inode_set_eofblocks_tag(
+__xfs_inode_set_blocks_tag(
        xfs_inode_t     *ip,
        void            (*execute)(struct xfs_mount *mp),
        void            (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
         * Don't bother locking the AG and looking up in the radix trees
         * if we already know that we have the tag set.
         */
-       if (ip->i_flags & XFS_IEOFBLOCKS)
+       if (ip->i_flags & xfs_iflag_for_tag(tag))
                return;
        spin_lock(&ip->i_flags_lock);
-       ip->i_flags |= XFS_IEOFBLOCKS;
+       ip->i_flags |= xfs_iflag_for_tag(tag);
        spin_unlock(&ip->i_flags_lock);
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_set_eofblocks_tag(ip);
-       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks,
+       return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
                        trace_xfs_perag_set_eofblocks,
                        XFS_ICI_EOFBLOCKS_TAG);
 }
 
 static void
-__xfs_inode_clear_eofblocks_tag(
+__xfs_inode_clear_blocks_tag(
        xfs_inode_t     *ip,
        void            (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
                                    int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
        struct xfs_perag *pag;
 
        spin_lock(&ip->i_flags_lock);
-       ip->i_flags &= ~XFS_IEOFBLOCKS;
+       ip->i_flags &= ~xfs_iflag_for_tag(tag);
        spin_unlock(&ip->i_flags_lock);
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_clear_eofblocks_tag(ip);
-       return __xfs_inode_clear_eofblocks_tag(ip,
+       return __xfs_inode_clear_blocks_tag(ip,
                        trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
 }
 
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_set_cowblocks_tag(ip);
-       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
+       return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
                        trace_xfs_perag_set_cowblocks,
                        XFS_ICI_COWBLOCKS_TAG);
 }
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
        xfs_inode_t     *ip)
 {
        trace_xfs_inode_clear_cowblocks_tag(ip);
-       return __xfs_inode_clear_eofblocks_tag(ip,
+       return __xfs_inode_clear_blocks_tag(ip,
                        trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
 }
index bff4d85e54984ad84ca0741f801f536a99195055..d4a77588eca15b90639debc1ca6c5c62a3680de8 100644 (file)
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
 int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
 int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
 void xfs_cowblocks_worker(struct work_struct *);
+void xfs_queue_cowblocks(struct xfs_mount *);
 
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
        int (*execute)(struct xfs_inode *ip, int flags, void *args),
index b41952a4ddd851fe63a475be4f2b6bc8d7e47beb..6f95bdb408ced01b9471b931714d279003a22d92 100644 (file)
@@ -1487,6 +1487,24 @@ xfs_link(
        return error;
 }
 
+/* Clear the reflink flag and the cowblocks tag if possible. */
+static void
+xfs_itruncate_clear_reflink_flags(
+       struct xfs_inode        *ip)
+{
+       struct xfs_ifork        *dfork;
+       struct xfs_ifork        *cfork;
+
+       if (!xfs_is_reflink_inode(ip))
+               return;
+       dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
+               ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+       if (cfork->if_bytes == 0)
+               xfs_inode_clear_cowblocks_tag(ip);
+}
+
 /*
  * Free up the underlying blocks past new_size.  The new size must be smaller
  * than the current size.  This routine can be used both for the attribute and
@@ -1583,15 +1601,7 @@ xfs_itruncate_extents(
        if (error)
                goto out;
 
-       /*
-        * Clear the reflink flag if there are no data fork blocks and
-        * there are no extents staged in the cow fork.
-        */
-       if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
-               if (ip->i_d.di_nblocks == 0)
-                       ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
-               xfs_inode_clear_cowblocks_tag(ip);
-       }
+       xfs_itruncate_clear_reflink_flags(ip);
 
        /*
         * Always re-log the inode so that our permanent transaction can keep
index b2136af9289f3d854f88a549ec32efa455cfc78b..d383e392ec9ddcca6f552dc8c4cfe5329373d6c9 100644 (file)
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
  * log recovery to replay a bmap operation on the inode.
  */
 #define XFS_IRECOVERY          (1 << 11)
+#define XFS_ICOWBLOCKS         (1 << 12)/* has the cowblocks tag set */
 
 /*
  * Per-lifetime flags need to be reset when re-using a reclaimable inode during
index 7ab52a8bc0a9e6dff904fe10b097eb3a478db9b9..66e1edbfb2b2bcd7d278226a33f0531bb0043b97 100644 (file)
@@ -1006,7 +1006,7 @@ xfs_file_iomap_begin(
        }
 
        ASSERT(offset <= mp->m_super->s_maxbytes);
-       if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
+       if (offset > mp->m_super->s_maxbytes - length)
                length = mp->m_super->s_maxbytes - offset;
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
        end_fsb = XFS_B_TO_FSB(mp, offset + length);
index ec952dfad359f6ad08d33d234f0cd75200c5933b..b897b11afb2c658bebba0416739fddcc0fec5aa4 100644 (file)
@@ -48,7 +48,7 @@
 STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
 
-
+STATIC void    xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi);
 STATIC void    xfs_qm_dqfree_one(struct xfs_dquot *dqp);
 /*
  * We use the batch lookup interface to iterate over the dquots as it
@@ -695,9 +695,17 @@ xfs_qm_init_quotainfo(
        qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
        qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
        qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
-       register_shrinker(&qinf->qi_shrinker);
+
+       error = register_shrinker(&qinf->qi_shrinker);
+       if (error)
+               goto out_free_inos;
+
        return 0;
 
+out_free_inos:
+       mutex_destroy(&qinf->qi_quotaofflock);
+       mutex_destroy(&qinf->qi_tree_lock);
+       xfs_qm_destroy_quotainos(qinf);
 out_free_lru:
        list_lru_destroy(&qinf->qi_lru);
 out_free_qinf:
@@ -706,7 +714,6 @@ out_free_qinf:
        return error;
 }
 
-
 /*
  * Gets called when unmounting a filesystem or when all quotas get
  * turned off.
@@ -723,19 +730,8 @@ xfs_qm_destroy_quotainfo(
 
        unregister_shrinker(&qi->qi_shrinker);
        list_lru_destroy(&qi->qi_lru);
-
-       if (qi->qi_uquotaip) {
-               IRELE(qi->qi_uquotaip);
-               qi->qi_uquotaip = NULL; /* paranoia */
-       }
-       if (qi->qi_gquotaip) {
-               IRELE(qi->qi_gquotaip);
-               qi->qi_gquotaip = NULL;
-       }
-       if (qi->qi_pquotaip) {
-               IRELE(qi->qi_pquotaip);
-               qi->qi_pquotaip = NULL;
-       }
+       xfs_qm_destroy_quotainos(qi);
+       mutex_destroy(&qi->qi_tree_lock);
        mutex_destroy(&qi->qi_quotaofflock);
        kmem_free(qi);
        mp->m_quotainfo = NULL;
@@ -1599,6 +1595,24 @@ error_rele:
        return error;
 }
 
+STATIC void
+xfs_qm_destroy_quotainos(
+       xfs_quotainfo_t *qi)
+{
+       if (qi->qi_uquotaip) {
+               IRELE(qi->qi_uquotaip);
+               qi->qi_uquotaip = NULL; /* paranoia */
+       }
+       if (qi->qi_gquotaip) {
+               IRELE(qi->qi_gquotaip);
+               qi->qi_gquotaip = NULL;
+       }
+       if (qi->qi_pquotaip) {
+               IRELE(qi->qi_pquotaip);
+               qi->qi_pquotaip = NULL;
+       }
+}
+
 STATIC void
 xfs_qm_dqfree_one(
        struct xfs_dquot        *dqp)
index cf7c8f81bebb566a486f0f732a0fe6a1040022c4..47aea2e82c268f4bbf9c25c1c1c6f3821c11caa3 100644 (file)
@@ -454,6 +454,8 @@ retry:
        if (error)
                goto out_bmap_cancel;
 
+       xfs_inode_set_cowblocks_tag(ip);
+
        /* Finish up. */
        error = xfs_defer_finish(&tp, &dfops);
        if (error)
@@ -490,8 +492,9 @@ xfs_reflink_find_cow_mapping(
        struct xfs_iext_cursor          icur;
 
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
-       ASSERT(xfs_is_reflink_inode(ip));
 
+       if (!xfs_is_reflink_inode(ip))
+               return false;
        offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
        if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
                return false;
@@ -610,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
 
                        /* Remove the mapping from the CoW fork. */
                        xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
+               } else {
+                       /* Didn't do anything, push cursor back. */
+                       xfs_iext_prev(ifp, &icur);
                }
 next_extent:
                if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -725,7 +731,7 @@ xfs_reflink_end_cow(
                        (unsigned int)(end_fsb - offset_fsb),
                        XFS_DATA_FORK);
        error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
-                       resblks, 0, 0, &tp);
+                       resblks, 0, XFS_TRANS_RESERVE, &tp);
        if (error)
                goto out;
 
@@ -1291,6 +1297,17 @@ xfs_reflink_remap_range(
 
        trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
 
+       /*
+        * Clear out post-eof preallocations because we don't have page cache
+        * backing the delayed allocations and they'll never get freed on
+        * their own.
+        */
+       if (xfs_can_free_eofblocks(dest, true)) {
+               ret = xfs_free_eofblocks(dest);
+               if (ret)
+                       goto out_unlock;
+       }
+
        /* Set flags and remap blocks. */
        ret = xfs_reflink_set_inode_flag(src, dest);
        if (ret)
index 5122d3021117f00e20d6dd1e195c28666cc71076..1dacccc367f81725a678ea3a6ed50528a731920d 100644 (file)
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
                        xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                        return error;
                }
+               xfs_queue_cowblocks(mp);
 
                /* Create the per-AG metadata reservation pool .*/
                error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
 
        /* rw -> ro */
        if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
+               /* Get rid of any leftover CoW reservations... */
+               cancel_delayed_work_sync(&mp->m_cowblocks_work);
+               error = xfs_icache_free_cowblocks(mp, NULL);
+               if (error) {
+                       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+                       return error;
+               }
+
                /* Free the per-AG metadata reservation pool. */
                error = xfs_fs_unreserve_ag_blocks(mp);
                if (error) {
index ea189d88a3cc761e0239043785e00d9c6d8402f5..8ac4e68a12f08e4e00c1fcf850f0f8c97188d390 100644 (file)
@@ -7,9 +7,10 @@
 #ifndef _ASM_GENERIC_MM_HOOKS_H
 #define _ASM_GENERIC_MM_HOOKS_H
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
index b234d54f2cb6e4c23a21db2af3b225264eccae2a..868e68561f913ecaec80ddf05f02816767ea5a17 100644 (file)
@@ -1025,6 +1025,11 @@ static inline int pmd_clear_huge(pmd_t *pmd)
 struct file;
 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
                        unsigned long size, pgprot_t *vma_prot);
+
+#ifndef CONFIG_X86_ESPFIX64
+static inline void init_espfix_bsp(void) { }
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef io_remap_pfn_range
index 38d9c5861ed8c110d5dbac1b6a807252b520f23e..f38227a78eae9f6b2d99ca9d1ca7dbb6ef720ce1 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/if_alg.h>
 #include <linux/scatterlist.h>
 #include <linux/types.h>
+#include <linux/atomic.h>
 #include <net/sock.h>
 
 #include <crypto/aead.h>
@@ -150,7 +151,7 @@ struct af_alg_ctx {
        struct crypto_wait wait;
 
        size_t used;
-       size_t rcvused;
+       atomic_t rcvused;
 
        bool more;
        bool merge;
@@ -215,7 +216,7 @@ static inline int af_alg_rcvbuf(struct sock *sk)
        struct af_alg_ctx *ctx = ask->private;
 
        return max_t(int, max_t(int, sk->sk_rcvbuf & PAGE_MASK, PAGE_SIZE) -
-                         ctx->rcvused, 0);
+                    atomic_read(&ctx->rcvused), 0);
 }
 
 /**
index cceafa01f9073293cf4e813247b54bbe094728e3..b67404fc4b34bab495086b4b1e969f53a0921b39 100644 (file)
@@ -27,6 +27,7 @@ static inline struct mcryptd_ahash *__mcryptd_ahash_cast(
 
 struct mcryptd_cpu_queue {
        struct crypto_queue queue;
+       spinlock_t q_lock;
        struct work_struct work;
 };
 
index 6e45608b2399813329e2280e601c2465940def53..9da6ce22803f03fc318a7fdd33af380eae67d4e6 100644 (file)
@@ -62,7 +62,7 @@ struct arch_timer_cpu {
        bool                    enabled;
 };
 
-int kvm_timer_hyp_init(void);
+int kvm_timer_hyp_init(bool);
 int kvm_timer_enable(struct kvm_vcpu *vcpu);
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
index 82f0c8fd7be8fd20951af806319f64f615f4ffc6..23d29b39f71e83e8a6a25540adc2e3f28702aec7 100644 (file)
@@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
 
 #define bio_set_dev(bio, bdev)                         \
 do {                                           \
+       if ((bio)->bi_disk != (bdev)->bd_disk)  \
+               bio_clear_flag(bio, BIO_THROTTLED);\
        (bio)->bi_disk = (bdev)->bd_disk;       \
        (bio)->bi_partno = (bdev)->bd_partno;   \
 } while (0)
index a1e628e032dad75bf1837a25e45b55a7f54ca2df..9e7d8bd776d227d2ba92b137af7230300f5b1d4a 100644 (file)
@@ -50,8 +50,6 @@ struct blk_issue_stat {
 struct bio {
        struct bio              *bi_next;       /* request queue link */
        struct gendisk          *bi_disk;
-       u8                      bi_partno;
-       blk_status_t            bi_status;
        unsigned int            bi_opf;         /* bottom bits req flags,
                                                 * top bits REQ_OP. Use
                                                 * accessors.
@@ -59,8 +57,8 @@ struct bio {
        unsigned short          bi_flags;       /* status, etc and bvec pool number */
        unsigned short          bi_ioprio;
        unsigned short          bi_write_hint;
-
-       struct bvec_iter        bi_iter;
+       blk_status_t            bi_status;
+       u8                      bi_partno;
 
        /* Number of segments in this BIO after
         * physical address coalescing is performed.
@@ -74,8 +72,9 @@ struct bio {
        unsigned int            bi_seg_front_size;
        unsigned int            bi_seg_back_size;
 
-       atomic_t                __bi_remaining;
+       struct bvec_iter        bi_iter;
 
+       atomic_t                __bi_remaining;
        bio_end_io_t            *bi_end_io;
 
        void                    *bi_private;
index 8089ca17db9ac65998ec9cf82f65743bb5c5abb9..0ce8a372d5069a7aca7810429a968d20e923d3d1 100644 (file)
@@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t;
 struct request {
        struct list_head queuelist;
        union {
-               call_single_data_t csd;
+               struct __call_single_data csd;
                u64 fifo_time;
        };
 
@@ -241,14 +241,24 @@ struct request {
        struct request *next_rq;
 };
 
+static inline bool blk_op_is_scsi(unsigned int op)
+{
+       return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT;
+}
+
+static inline bool blk_op_is_private(unsigned int op)
+{
+       return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
+}
+
 static inline bool blk_rq_is_scsi(struct request *rq)
 {
-       return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT;
+       return blk_op_is_scsi(req_op(rq));
 }
 
 static inline bool blk_rq_is_private(struct request *rq)
 {
-       return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT;
+       return blk_op_is_private(req_op(rq));
 }
 
 static inline bool blk_rq_is_passthrough(struct request *rq)
@@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
        return blk_rq_is_scsi(rq) || blk_rq_is_private(rq);
 }
 
+static inline bool bio_is_passthrough(struct bio *bio)
+{
+       unsigned op = bio_op(bio);
+
+       return blk_op_is_scsi(op) || blk_op_is_private(op);
+}
+
 static inline unsigned short req_get_ioprio(struct request *req)
 {
        return req->ioprio;
@@ -948,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 extern void blk_rq_unprep_clone(struct request *rq);
 extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
                                     struct request *rq);
-extern int blk_rq_append_bio(struct request *rq, struct bio *bio);
+extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_queue_split(struct request_queue *, struct bio **);
 extern void blk_recount_segments(struct request_queue *, struct bio *);
index e55e4255a21082325f0888ffcd464615add708b8..0b25cf87b6d608b6a1646e89e1408270f148a783 100644 (file)
@@ -43,7 +43,14 @@ struct bpf_map_ops {
 };
 
 struct bpf_map {
-       atomic_t refcnt;
+       /* 1st cacheline with read-mostly members of which some
+        * are also accessed in fast-path (e.g. ops, max_entries).
+        */
+       const struct bpf_map_ops *ops ____cacheline_aligned;
+       struct bpf_map *inner_map_meta;
+#ifdef CONFIG_SECURITY
+       void *security;
+#endif
        enum bpf_map_type map_type;
        u32 key_size;
        u32 value_size;
@@ -52,15 +59,17 @@ struct bpf_map {
        u32 pages;
        u32 id;
        int numa_node;
-       struct user_struct *user;
-       const struct bpf_map_ops *ops;
-       struct work_struct work;
+       bool unpriv_array;
+       /* 7 bytes hole */
+
+       /* 2nd cacheline with misc members to avoid false sharing
+        * particularly with refcounting.
+        */
+       struct user_struct *user ____cacheline_aligned;
+       atomic_t refcnt;
        atomic_t usercnt;
-       struct bpf_map *inner_map_meta;
+       struct work_struct work;
        char name[BPF_OBJ_NAME_LEN];
-#ifdef CONFIG_SECURITY
-       void *security;
-#endif
 };
 
 /* function argument constraints */
@@ -221,6 +230,7 @@ struct bpf_prog_aux {
 struct bpf_array {
        struct bpf_map map;
        u32 elem_size;
+       u32 index_mask;
        /* 'ownership' of prog_array is claimed by the first program that
         * is going to use this map or by the first program which FD is stored
         * in the map to make sure that all callers and callees have the same
@@ -419,6 +429,8 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
                attr->numa_node : NUMA_NO_NODE;
 }
 
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
+
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -506,6 +518,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
 {
        return 0;
 }
+
+static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
+                               enum bpf_prog_type type)
+{
+       return ERR_PTR(-EOPNOTSUPP);
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@ -514,6 +532,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
        return bpf_prog_get_type_dev(ufd, type, false);
 }
 
+bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
+
 int bpf_prog_offload_compile(struct bpf_prog *prog);
 void bpf_prog_offload_destroy(struct bpf_prog *prog);
 
index c561b986bab0ebf886000ea34e377ea789138a3b..1632bb13ad8aed8cfeba2ccc69cfa458d02540bb 100644 (file)
  * In practice this is far bigger than any realistic pointer offset; this limit
  * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
  */
-#define BPF_MAX_VAR_OFF        (1ULL << 31)
+#define BPF_MAX_VAR_OFF        (1 << 29)
 /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
  * that converting umax_value to int cannot overflow.
  */
-#define BPF_MAX_VAR_SIZ        INT_MAX
+#define BPF_MAX_VAR_SIZ        (1 << 29)
 
 /* Liveness marks, used for registers and spilled-regs (in stack slots).
  * Read marks propagate upwards until they find a write mark; they record that
index 94a59ba7d422f4d3b4a53314d99fa4a1367fbcc4..519e94915d1850628ba3a79f7341c29ce3a68f40 100644 (file)
@@ -32,7 +32,6 @@ struct completion {
 #define init_completion(x) __init_completion(x)
 static inline void complete_acquire(struct completion *x) {}
 static inline void complete_release(struct completion *x) {}
-static inline void complete_release_commit(struct completion *x) {}
 
 #define COMPLETION_INITIALIZER(work) \
        { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
index a04ef7c15c6a9dd504a288e23d7da3d6322459e2..7b01bc11c6929b7aaf4906a1587ee1b4e19b2e70 100644 (file)
@@ -47,6 +47,13 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
 extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
 extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
 
+extern ssize_t cpu_show_meltdown(struct device *dev,
+                                struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v1(struct device *dev,
+                                  struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spectre_v2(struct device *dev,
+                                  struct device_attribute *attr, char *buf);
+
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
                                 const struct attribute_group **groups,
index 201ab72679863163c40f664cacca77b09dfaaef5..1a32e558eb1175e812234dfaa18d5e4665b46f30 100644 (file)
@@ -86,7 +86,7 @@ enum cpuhp_state {
        CPUHP_MM_ZSWP_POOL_PREPARE,
        CPUHP_KVM_PPC_BOOK3S_PREPARE,
        CPUHP_ZCOMP_PREPARE,
-       CPUHP_TIMERS_DEAD,
+       CPUHP_TIMERS_PREPARE,
        CPUHP_MIPS_SOC_PREPARE,
        CPUHP_BP_PREPARE_DYN,
        CPUHP_BP_PREPARE_DYN_END                = CPUHP_BP_PREPARE_DYN + 20,
index 06097ef304491dc8f9743154c10f92d20165cb47..b511f6d24b42b0c4a2796bc33f494d86b7c7854b 100644 (file)
@@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
        vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
 #define VMCOREINFO_SYMBOL(name) \
        vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ARRAY(name) \
+       vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name)
 #define VMCOREINFO_SIZE(name) \
        vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
                              (unsigned long)sizeof(name))
index d813f7b04da7a1dbe35fc7dac5bb2842287c8021..29fdf8029cf6fea785631ce65c99eb16c80bf926 100644 (file)
@@ -140,11 +140,13 @@ struct efi_boot_memmap {
 
 struct capsule_info {
        efi_capsule_header_t    header;
+       efi_capsule_header_t    *capsule;
        int                     reset_type;
        long                    index;
        size_t                  count;
        size_t                  total_size;
-       phys_addr_t             *pages;
+       struct page             **pages;
+       phys_addr_t             *phys;
        size_t                  page_bytes_remain;
 };
 
index f4ff47d4a893a7bd0ef7f4baeef6c29dbee2a9f2..fe0c349684fa83428a31814d38836a6a2a7000da 100644 (file)
@@ -755,7 +755,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie,
 {
        if (fscache_cookie_valid(cookie) && PageFsCache(page))
                return __fscache_maybe_release_page(cookie, page, gfp);
-       return false;
+       return true;
 }
 
 /**
index 55e672592fa93cd94aef5dfab761b688df0a7235..7258cd676df42c3ea77700d1a6ed15e7957e4f37 100644 (file)
@@ -66,9 +66,10 @@ struct gpio_irq_chip {
        /**
         * @lock_key:
         *
-        * Per GPIO IRQ chip lockdep class.
+        * Per GPIO IRQ chip lockdep classes.
         */
        struct lock_class_key *lock_key;
+       struct lock_class_key *request_key;
 
        /**
         * @parent_handler:
@@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip,
 
 /* add/remove chips */
 extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
-                                     struct lock_class_key *lock_key);
+                                     struct lock_class_key *lock_key,
+                                     struct lock_class_key *request_key);
 
 /**
  * gpiochip_add_data() - register a gpio_chip
@@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
  */
 #ifdef CONFIG_LOCKDEP
 #define gpiochip_add_data(chip, data) ({               \
-               static struct lock_class_key key;       \
-               gpiochip_add_data_with_key(chip, data, &key);   \
+               static struct lock_class_key lock_key;  \
+               static struct lock_class_key request_key;         \
+               gpiochip_add_data_with_key(chip, data, &lock_key, \
+                                          &request_key);         \
        })
 #else
-#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL)
+#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL)
 #endif
 
 static inline int gpiochip_add(struct gpio_chip *chip)
@@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip,
                             irq_flow_handler_t handler,
                             unsigned int type,
                             bool threaded,
-                            struct lock_class_key *lock_key);
+                            struct lock_class_key *lock_key,
+                            struct lock_class_key *request_key);
 
 #ifdef CONFIG_LOCKDEP
 
@@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
                                       irq_flow_handler_t handler,
                                       unsigned int type)
 {
-       static struct lock_class_key key;
+       static struct lock_class_key lock_key;
+       static struct lock_class_key request_key;
 
        return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-                                       handler, type, false, &key);
+                                       handler, type, false,
+                                       &lock_key, &request_key);
 }
 
 static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
                          unsigned int type)
 {
 
-       static struct lock_class_key key;
+       static struct lock_class_key lock_key;
+       static struct lock_class_key request_key;
 
        return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-                                       handler, type, true, &key);
+                                       handler, type, true,
+                                       &lock_key, &request_key);
 }
 #else
 static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
@@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
                                       unsigned int type)
 {
        return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-                                       handler, type, false, NULL);
+                                       handler, type, false, NULL, NULL);
 }
 
 static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
@@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
                          unsigned int type)
 {
        return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq,
-                                       handler, type, true, NULL);
+                                       handler, type, true, NULL, NULL);
 }
 #endif /* CONFIG_LOCKDEP */
 
index f3274d9f46a252a9abc332dc0621ec3fc02ef4aa..8dad3dd26eaeddc60aea0ce70547a2964364befd 100644 (file)
@@ -83,7 +83,9 @@
 /*
  * Decoding Capability Register
  */
+#define cap_5lp_support(c)     (((c) >> 60) & 1)
 #define cap_pi_support(c)      (((c) >> 59) & 1)
+#define cap_fl1gp_support(c)   (((c) >> 56) & 1)
 #define cap_read_drain(c)      (((c) >> 55) & 1)
 #define cap_write_drain(c)     (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
index cb18c6290ca87290996e636f3eda14eb03d26316..8415bf1a9776245b810c8f92fa16c98eb038a45f 100644 (file)
@@ -273,7 +273,8 @@ struct ipv6_pinfo {
                                                 * 100: prefer care-of address
                                                 */
                                dontfrag:1,
-                               autoflowlabel:1;
+                               autoflowlabel:1,
+                               autoflowlabel_set:1;
        __u8                    min_hopcount;
        __u8                    tclass;
        __be32                  rcv_flowinfo;
index e140f69163b693b386bdc709719b4efc3d8a30b0..a0231e96a578348c21596a3c9d2d0cbe4a9a1249 100644 (file)
@@ -212,6 +212,7 @@ struct irq_data {
  *                               mask. Applies only to affinity managed irqs.
  * IRQD_SINGLE_TARGET          - IRQ allows only a single affinity target
  * IRQD_DEFAULT_TRIGGER_SET    - Expected trigger already been set
+ * IRQD_CAN_RESERVE            - Can use reservation mode
  */
 enum {
        IRQD_TRIGGER_MASK               = 0xf,
@@ -233,6 +234,7 @@ enum {
        IRQD_MANAGED_SHUTDOWN           = (1 << 23),
        IRQD_SINGLE_TARGET              = (1 << 24),
        IRQD_DEFAULT_TRIGGER_SET        = (1 << 25),
+       IRQD_CAN_RESERVE                = (1 << 26),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d)
        return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
 }
 
+static inline void irqd_set_can_reserve(struct irq_data *d)
+{
+       __irqd_to_state(d) |= IRQD_CAN_RESERVE;
+}
+
+static inline void irqd_clr_can_reserve(struct irq_data *d)
+{
+       __irqd_to_state(d) &= ~IRQD_CAN_RESERVE;
+}
+
+static inline bool irqd_can_reserve(struct irq_data *d)
+{
+       return __irqd_to_state(d) & IRQD_CAN_RESERVE;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
index 39fb3700f7a92aae1a6c3d417f5effbfb93d6494..25b33b66453773cb01509725fa68664c555ffd3f 100644 (file)
@@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq)
 }
 
 static inline void
-irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class)
+irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class,
+                     struct lock_class_key *request_class)
 {
        struct irq_desc *desc = irq_to_desc(irq);
 
-       if (desc)
-               lockdep_set_class(&desc->lock, class);
+       if (desc) {
+               lockdep_set_class(&desc->lock, lock_class);
+               lockdep_set_class(&desc->request_mutex, request_class);
+       }
 }
 
 #ifdef CONFIG_IRQ_PREFLOW_FASTEOI
index a34355d195463f93d3235283978633bf60f0b59d..48c7e86bb5566798aba1b1b132ab242b75b05ea9 100644 (file)
@@ -113,7 +113,7 @@ struct irq_domain_ops {
                     unsigned int nr_irqs, void *arg);
        void (*free)(struct irq_domain *d, unsigned int virq,
                     unsigned int nr_irqs);
-       int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early);
+       int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve);
        void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
        int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec,
                         unsigned long *out_hwirq, unsigned int *out_type);
index 46cb57d5eb1361e521f36208f0fef6b31fd071ea..1b3996ff3f16d10158056e45425b81b5e7c5cbd8 100644 (file)
 # define trace_hardirq_enter()                 \
 do {                                           \
        current->hardirq_context++;             \
-       crossrelease_hist_start(XHLOCK_HARD);   \
 } while (0)
 # define trace_hardirq_exit()                  \
 do {                                           \
        current->hardirq_context--;             \
-       crossrelease_hist_end(XHLOCK_HARD);     \
 } while (0)
 # define lockdep_softirq_enter()               \
 do {                                           \
        current->softirq_context++;             \
-       crossrelease_hist_start(XHLOCK_SOFT);   \
 } while (0)
 # define lockdep_softirq_exit()                        \
 do {                                           \
        current->softirq_context--;             \
-       crossrelease_hist_end(XHLOCK_SOFT);     \
 } while (0)
 # define INIT_TRACE_IRQFLAGS   .softirqs_enabled = 1,
 #else
index 2e75dc34bff5cd3e468571918329793eaf7a0911..3251d9c0d3137ee594a8727f4f817bdb2e2d2350 100644 (file)
@@ -475,8 +475,6 @@ enum xhlock_context_t {
 #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
        { .name = (_name), .key = (void *)(_key), }
 
-static inline void crossrelease_hist_start(enum xhlock_context_t c) {}
-static inline void crossrelease_hist_end(enum xhlock_context_t c) {}
 static inline void lockdep_invariant_state(bool force) {}
 static inline void lockdep_init_task(struct task_struct *task) {}
 static inline void lockdep_free_task(struct task_struct *task) {}
index a2a1318a3d0c8be0a1fb3d1a08fcf671ff9d8bee..c3d3f04d8cc689eddf217c0626e71d8c16530db5 100644 (file)
@@ -915,10 +915,10 @@ enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
 #define LTR_L1SS_PWR_GATE_CHECK_CARD_EN        BIT(6)
 
 enum dev_aspm_mode {
-       DEV_ASPM_DISABLE = 0,
        DEV_ASPM_DYNAMIC,
        DEV_ASPM_BACKDOOR,
        DEV_ASPM_STATIC,
+       DEV_ASPM_DISABLE,
 };
 
 /*
index a886b51511abbf146c4c76309aa313e4bbf77dda..1f509d072026d3d62a9e3701ed72bd5ab2d5b5c4 100644 (file)
@@ -556,6 +556,7 @@ struct mlx5_core_sriov {
 };
 
 struct mlx5_irq_info {
+       cpumask_var_t mask;
        char name[MLX5_MAX_IRQ_NAME];
 };
 
@@ -1048,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
                       enum mlx5_eq_type type);
 int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_start_eqs(struct mlx5_core_dev *dev);
-int mlx5_stop_eqs(struct mlx5_core_dev *dev);
+void mlx5_stop_eqs(struct mlx5_core_dev *dev);
 int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
                    unsigned int *irqn);
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
@@ -1164,6 +1165,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
+                                u64 *values,
+                                int num_counters,
+                                size_t *offsets);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 
index 38a7577a9ce71fbcf63c21e2911364795842daa8..d44ec5f41d4a04c72b25b4db1d6fb0217f8f1fa1 100644 (file)
@@ -147,7 +147,7 @@ enum {
        MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
        MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
        MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
-       MLX5_CMD_OP_SET_RATE_LIMIT                = 0x780,
+       MLX5_CMD_OP_SET_PP_RATE_LIMIT             = 0x780,
        MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
        MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT      = 0x782,
        MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT     = 0x783,
@@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits {
        u8         vxlan_udp_port[0x10];
 };
 
-struct mlx5_ifc_set_rate_limit_out_bits {
+struct mlx5_ifc_set_pp_rate_limit_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
 
@@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits {
        u8         reserved_at_40[0x40];
 };
 
-struct mlx5_ifc_set_rate_limit_in_bits {
+struct mlx5_ifc_set_pp_rate_limit_in_bits {
        u8         opcode[0x10];
        u8         reserved_at_10[0x10];
 
@@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits {
        u8         reserved_at_60[0x20];
 
        u8         rate_limit[0x20];
+
+       u8         reserved_at_a0[0x160];
 };
 
 struct mlx5_ifc_access_register_out_bits {
index cddfaff4d0b70ff112e72bc9125d2c2b45dc5fd2..4fa654e4b5a92e1133cc532347711a64edd9eb93 100644 (file)
@@ -34,9 +34,6 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
 
 extern struct of_device_id __iommu_of_table;
 
-typedef int (*of_iommu_init_fn)(struct device_node *);
-
-#define IOMMU_OF_DECLARE(name, compat, fn) \
-       _OF_DECLARE(iommu, name, compat, fn, of_iommu_init_fn)
+#define IOMMU_OF_DECLARE(name, compat) OF_DECLARE_1(iommu, name, compat, NULL)
 
 #endif /* __OF_IOMMU_H */
diff --git a/include/linux/pti.h b/include/linux/pti.h
new file mode 100644 (file)
index 0000000..0174883
--- /dev/null
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _INCLUDE_PTI_H
+#define _INCLUDE_PTI_H
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#include <asm/pti.h>
+#else
+static inline void pti_init(void) { }
+#endif
+
+#endif
index ff3642d267f7f3311a063e2b1e2b2ebd946686e8..94081e9a5010608d81f4a30badef921b69ee5ecc 100644 (file)
@@ -17,7 +17,6 @@ struct sh_eth_plat_data {
        unsigned char mac_addr[ETH_ALEN];
        unsigned no_ether_link:1;
        unsigned ether_link_active_low:1;
-       unsigned needs_init:1;
 };
 
 #endif
index 7b2170bfd6e7dae432478fffdbc70e1408740394..bc6bb325d1bf7c03db223c568b891e5d33dc93ca 100644 (file)
@@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
  *     for that name.  This appears in the sysfs "modalias" attribute
  *     for driver coldplugging, and in uevents used for hotplugging
  * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
- *     when not using a GPIO line)
+ *     not using a GPIO line)
  *
  * @statistics: statistics for the spi_device
  *
index f442d1a42025925eb4446c6678e3a059a8cfbb39..7cc35921218ecb745634e29d0558749981b4089d 100644 (file)
@@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern unsigned long tick_nohz_get_idle_calls(void);
+extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 #else /* !CONFIG_NO_HZ_COMMON */
index 04af640ea95bd011cdec0101798b0aa7810233cb..2448f9cc48a3120222d29f8110069c626b428543 100644 (file)
@@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j);
 unsigned long round_jiffies_up_relative(unsigned long j);
 
 #ifdef CONFIG_HOTPLUG_CPU
+int timers_prepare_cpu(unsigned int cpu);
 int timers_dead_cpu(unsigned int cpu);
 #else
-#define timers_dead_cpu NULL
+#define timers_prepare_cpu     NULL
+#define timers_dead_cpu                NULL
 #endif
 
 #endif
index 8b8118a7fadbc74a9aa879dc934de0442bb3a013..cb4d92b79cd932eda4e178861d8345683b329bdb 100644 (file)
@@ -3226,7 +3226,6 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN.
  * @WIPHY_FLAG_MESH_AUTH: The device supports mesh authentication by routing
  *     auth frames to userspace. See @NL80211_MESH_SETUP_USERSPACE_AUTH.
- * @WIPHY_FLAG_SUPPORTS_SCHED_SCAN: The device supports scheduled scans.
  * @WIPHY_FLAG_SUPPORTS_FW_ROAM: The device supports roaming feature in the
  *     firmware.
  * @WIPHY_FLAG_AP_UAPSD: The device supports uapsd on AP.
index 0105445cab83d32008b3526794c077f4bfbd9816..8e08b6da72f325bd4a623191e886fb1b746644d7 100644 (file)
@@ -694,9 +694,7 @@ struct tc_cls_matchall_offload {
 };
 
 enum tc_clsbpf_command {
-       TC_CLSBPF_ADD,
-       TC_CLSBPF_REPLACE,
-       TC_CLSBPF_DESTROY,
+       TC_CLSBPF_OFFLOAD,
        TC_CLSBPF_STATS,
 };
 
@@ -705,6 +703,7 @@ struct tc_cls_bpf_offload {
        enum tc_clsbpf_command command;
        struct tcf_exts *exts;
        struct bpf_prog *prog;
+       struct bpf_prog *oldprog;
        const char *name;
        bool exts_integrated;
        u32 gen_flags;
index 2f8f93da5dc2660f4db37c04f8a434809b3120a1..9a5ccf03a59b1e0a4820e2a978c66f1df8a9a82f 100644 (file)
@@ -966,7 +966,7 @@ void sctp_transport_burst_limited(struct sctp_transport *);
 void sctp_transport_burst_reset(struct sctp_transport *);
 unsigned long sctp_transport_timeout(struct sctp_transport *);
 void sctp_transport_reset(struct sctp_transport *t);
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
index 9155da42269208b358df8535b14dfd3dba509365..7a7b14e9628a1174e18dda30cd0b5bfd5b32d30d 100644 (file)
@@ -1514,6 +1514,11 @@ static inline bool sock_owned_by_user(const struct sock *sk)
        return sk->sk_lock.owned;
 }
 
+static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
+{
+       return sk->sk_lock.owned;
+}
+
 /* no reclassification while locks are held */
 static inline bool sock_allow_reclassification(const struct sock *csk)
 {
index 13223396dc64abdbcd504b2439214efeb5c6def8..f96391e84a8a3161df98a964692e7ce445050944 100644 (file)
@@ -146,7 +146,7 @@ struct vxlanhdr_gpe {
                np_applied:1,
                instance_applied:1,
                version:2,
-reserved_flags2:2;
+               reserved_flags2:2;
 #elif defined(__BIG_ENDIAN_BITFIELD)
        u8      reserved_flags2:2,
                version:2,
index dc28a98ce97ca7d53d67809a0fc718f0b9d622f8..ae35991b5877029b217a2b49fba7f012f193148c 100644 (file)
@@ -1570,6 +1570,9 @@ int xfrm_init_state(struct xfrm_state *x);
 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
 int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
+int xfrm_trans_queue(struct sk_buff *skb,
+                    int (*finish)(struct net *, struct sock *,
+                                  struct sk_buff *));
 int xfrm_output_resume(struct sk_buff *skb, int err);
 int xfrm_output(struct sock *sk, struct sk_buff *skb);
 int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
index 758607226bfdd5269d33557b650a31628a59d22f..2cd449328aee37e55de94633d803d73fe3057f9e 100644 (file)
@@ -134,12 +134,12 @@ DECLARE_EVENT_CLASS(clk_parent,
 
        TP_STRUCT__entry(
                __string(        name,           core->name                )
-               __string(        pname,          parent->name              )
+               __string(        pname, parent ? parent->name : "none"     )
        ),
 
        TP_fast_assign(
                __assign_str(name, core->name);
-               __assign_str(pname, parent->name);
+               __assign_str(pname, parent ? parent->name : "none");
        ),
 
        TP_printk("%s %s", __get_str(name), __get_str(pname))
index e4b0b8e099325f2801e4f3af168004c603c23794..2c735a3e66133fc08740b4df6d64919c491c9d1e 100644 (file)
@@ -211,7 +211,7 @@ TRACE_EVENT(kvm_ack_irq,
        { KVM_TRACE_MMIO_WRITE, "write" }
 
 TRACE_EVENT(kvm_mmio,
-       TP_PROTO(int type, int len, u64 gpa, u64 val),
+       TP_PROTO(int type, int len, u64 gpa, void *val),
        TP_ARGS(type, len, gpa, val),
 
        TP_STRUCT__entry(
@@ -225,7 +225,10 @@ TRACE_EVENT(kvm_mmio,
                __entry->type           = type;
                __entry->len            = len;
                __entry->gpa            = gpa;
-               __entry->val            = val;
+               __entry->val            = 0;
+               if (val)
+                       memcpy(&__entry->val, val,
+                              min_t(u32, sizeof(__entry->val), len));
        ),
 
        TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
index 07cccca6cbf1762684152146372e35e1cd758338..ab34c561f26bec42a8ff32cb6f7e911447f03af2 100644 (file)
                tcp_state_name(TCP_CLOSING),            \
                tcp_state_name(TCP_NEW_SYN_RECV))
 
+#define TP_STORE_V4MAPPED(__entry, saddr, daddr)               \
+       do {                                                    \
+               struct in6_addr *pin6;                          \
+                                                               \
+               pin6 = (struct in6_addr *)__entry->saddr_v6;    \
+               ipv6_addr_set_v4mapped(saddr, pin6);            \
+               pin6 = (struct in6_addr *)__entry->daddr_v6;    \
+               ipv6_addr_set_v4mapped(daddr, pin6);            \
+       } while (0)
+
+#if IS_ENABLED(CONFIG_IPV6)
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)          \
+       do {                                                            \
+               if (sk->sk_family == AF_INET6) {                        \
+                       struct in6_addr *pin6;                          \
+                                                                       \
+                       pin6 = (struct in6_addr *)__entry->saddr_v6;    \
+                       *pin6 = saddr6;                                 \
+                       pin6 = (struct in6_addr *)__entry->daddr_v6;    \
+                       *pin6 = daddr6;                                 \
+               } else {                                                \
+                       TP_STORE_V4MAPPED(__entry, saddr, daddr);       \
+               }                                                       \
+       } while (0)
+#else
+#define TP_STORE_ADDRS(__entry, saddr, daddr, saddr6, daddr6)  \
+       TP_STORE_V4MAPPED(__entry, saddr, daddr)
+#endif
+
 /*
  * tcp event with arguments sk and skb
  *
@@ -50,7 +79,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
 
        TP_fast_assign(
                struct inet_sock *inet = inet_sk(sk);
-               struct in6_addr *pin6;
                __be32 *p32;
 
                __entry->skbaddr = skb;
@@ -65,20 +93,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
                p32 = (__be32 *) __entry->daddr;
                *p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-               if (sk->sk_family == AF_INET6) {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       *pin6 = sk->sk_v6_rcv_saddr;
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       *pin6 = sk->sk_v6_daddr;
-               } else
-#endif
-               {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-               }
+               TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+                             sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
        ),
 
        TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -127,7 +143,6 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
 
        TP_fast_assign(
                struct inet_sock *inet = inet_sk(sk);
-               struct in6_addr *pin6;
                __be32 *p32;
 
                __entry->skaddr = sk;
@@ -141,20 +156,8 @@ DECLARE_EVENT_CLASS(tcp_event_sk,
                p32 = (__be32 *) __entry->daddr;
                *p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-               if (sk->sk_family == AF_INET6) {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       *pin6 = sk->sk_v6_rcv_saddr;
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       *pin6 = sk->sk_v6_daddr;
-               } else
-#endif
-               {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-               }
+               TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+                              sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
        ),
 
        TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
@@ -197,7 +200,6 @@ TRACE_EVENT(tcp_set_state,
 
        TP_fast_assign(
                struct inet_sock *inet = inet_sk(sk);
-               struct in6_addr *pin6;
                __be32 *p32;
 
                __entry->skaddr = sk;
@@ -213,20 +215,8 @@ TRACE_EVENT(tcp_set_state,
                p32 = (__be32 *) __entry->daddr;
                *p32 =  inet->inet_daddr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-               if (sk->sk_family == AF_INET6) {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       *pin6 = sk->sk_v6_rcv_saddr;
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       *pin6 = sk->sk_v6_daddr;
-               } else
-#endif
-               {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
-               }
+               TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+                              sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
        ),
 
        TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c oldstate=%s newstate=%s",
@@ -256,7 +246,6 @@ TRACE_EVENT(tcp_retransmit_synack,
 
        TP_fast_assign(
                struct inet_request_sock *ireq = inet_rsk(req);
-               struct in6_addr *pin6;
                __be32 *p32;
 
                __entry->skaddr = sk;
@@ -271,20 +260,8 @@ TRACE_EVENT(tcp_retransmit_synack,
                p32 = (__be32 *) __entry->daddr;
                *p32 = ireq->ir_rmt_addr;
 
-#if IS_ENABLED(CONFIG_IPV6)
-               if (sk->sk_family == AF_INET6) {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       *pin6 = ireq->ir_v6_loc_addr;
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       *pin6 = ireq->ir_v6_rmt_addr;
-               } else
-#endif
-               {
-                       pin6 = (struct in6_addr *)__entry->saddr_v6;
-                       ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6);
-                       pin6 = (struct in6_addr *)__entry->daddr_v6;
-                       ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6);
-               }
+               TP_STORE_ADDRS(__entry, ireq->ir_loc_addr, ireq->ir_rmt_addr,
+                             ireq->ir_v6_loc_addr, ireq->ir_v6_rmt_addr);
        ),
 
        TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
index 3ee3bf7c85262b034702875b7d356ac7595abea6..144de4d2f385e0b05233162ec425a6763262e038 100644 (file)
@@ -23,6 +23,7 @@
 #define _UAPI_LINUX_IF_ETHER_H
 
 #include <linux/types.h>
+#include <linux/libc-compat.h>
 
 /*
  *     IEEE 802.3 Ethernet magic constants.  The frame sizes omit the preamble
  *     This is an Ethernet frame header.
  */
 
+#if __UAPI_DEF_ETHHDR
 struct ethhdr {
        unsigned char   h_dest[ETH_ALEN];       /* destination eth addr */
        unsigned char   h_source[ETH_ALEN];     /* source ether addr    */
        __be16          h_proto;                /* packet type ID field */
 } __attribute__((packed));
+#endif
 
 
 #endif /* _UAPI_LINUX_IF_ETHER_H */
index 282875cf805657b41eb0b5a797aa77647b889159..fc29efaa918cb76fab92a65890cca1aab3ecf9ba 100644 (file)
 
 /* If we did not see any headers from any supported C libraries,
  * or we are being included in the kernel, then define everything
- * that we need. */
+ * that we need. Check for previous __UAPI_* definitions to give
+ * unsupported C libraries a way to opt out of any kernel definition. */
 #else /* !defined(__GLIBC__) */
 
 /* Definitions for if.h */
+#ifndef __UAPI_DEF_IF_IFCONF
 #define __UAPI_DEF_IF_IFCONF 1
+#endif
+#ifndef __UAPI_DEF_IF_IFMAP
 #define __UAPI_DEF_IF_IFMAP 1
+#endif
+#ifndef __UAPI_DEF_IF_IFNAMSIZ
 #define __UAPI_DEF_IF_IFNAMSIZ 1
+#endif
+#ifndef __UAPI_DEF_IF_IFREQ
 #define __UAPI_DEF_IF_IFREQ 1
+#endif
 /* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS
 #define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
+#endif
 /* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
 #define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+#endif
 
 /* Definitions for in.h */
+#ifndef __UAPI_DEF_IN_ADDR
 #define __UAPI_DEF_IN_ADDR             1
+#endif
+#ifndef __UAPI_DEF_IN_IPPROTO
 #define __UAPI_DEF_IN_IPPROTO          1
+#endif
+#ifndef __UAPI_DEF_IN_PKTINFO
 #define __UAPI_DEF_IN_PKTINFO          1
+#endif
+#ifndef __UAPI_DEF_IP_MREQ
 #define __UAPI_DEF_IP_MREQ             1
+#endif
+#ifndef __UAPI_DEF_SOCKADDR_IN
 #define __UAPI_DEF_SOCKADDR_IN         1
+#endif
+#ifndef __UAPI_DEF_IN_CLASS
 #define __UAPI_DEF_IN_CLASS            1
+#endif
 
 /* Definitions for in6.h */
+#ifndef __UAPI_DEF_IN6_ADDR
 #define __UAPI_DEF_IN6_ADDR            1
+#endif
+#ifndef __UAPI_DEF_IN6_ADDR_ALT
 #define __UAPI_DEF_IN6_ADDR_ALT                1
+#endif
+#ifndef __UAPI_DEF_SOCKADDR_IN6
 #define __UAPI_DEF_SOCKADDR_IN6                1
+#endif
+#ifndef __UAPI_DEF_IPV6_MREQ
 #define __UAPI_DEF_IPV6_MREQ           1
+#endif
+#ifndef __UAPI_DEF_IPPROTO_V6
 #define __UAPI_DEF_IPPROTO_V6          1
+#endif
+#ifndef __UAPI_DEF_IPV6_OPTIONS
 #define __UAPI_DEF_IPV6_OPTIONS                1
+#endif
+#ifndef __UAPI_DEF_IN6_PKTINFO
 #define __UAPI_DEF_IN6_PKTINFO         1
+#endif
+#ifndef __UAPI_DEF_IP6_MTUINFO
 #define __UAPI_DEF_IP6_MTUINFO         1
+#endif
 
 /* Definitions for ipx.h */
+#ifndef __UAPI_DEF_SOCKADDR_IPX
 #define __UAPI_DEF_SOCKADDR_IPX                        1
+#endif
+#ifndef __UAPI_DEF_IPX_ROUTE_DEFINITION
 #define __UAPI_DEF_IPX_ROUTE_DEFINITION                1
+#endif
+#ifndef __UAPI_DEF_IPX_INTERFACE_DEFINITION
 #define __UAPI_DEF_IPX_INTERFACE_DEFINITION    1
+#endif
+#ifndef __UAPI_DEF_IPX_CONFIG_DATA
 #define __UAPI_DEF_IPX_CONFIG_DATA             1
+#endif
+#ifndef __UAPI_DEF_IPX_ROUTE_DEF
 #define __UAPI_DEF_IPX_ROUTE_DEF               1
+#endif
 
 /* Definitions for xattr.h */
+#ifndef __UAPI_DEF_XATTR
 #define __UAPI_DEF_XATTR               1
+#endif
 
 #endif /* __GLIBC__ */
 
+/* Definitions for if_ether.h */
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
+#ifndef __UAPI_DEF_ETHHDR
+#define __UAPI_DEF_ETHHDR              1
+#endif
+
 #endif /* _UAPI_LIBC_COMPAT_H */
index 3fea7709a4412c5e3c7f94daed08576b55bf3bc2..57ccfb32e87f97bad06f7a263ceecc20cd74dc9d 100644 (file)
@@ -36,7 +36,7 @@ enum ip_conntrack_info {
 
 #define NF_CT_STATE_INVALID_BIT                        (1 << 0)
 #define NF_CT_STATE_BIT(ctinfo)                        (1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
-#define NF_CT_STATE_UNTRACKED_BIT              (1 << (IP_CT_UNTRACKED + 1))
+#define NF_CT_STATE_UNTRACKED_BIT              (1 << 6)
 
 /* Bitset representing status of connection. */
 enum ip_conntrack_status {
index 4914b93a23f2bdeb066a4048d7ab749456ae3fe3..61f410fd74e4cf4180f7ad5ffa1d996cc1528c91 100644 (file)
@@ -44,3 +44,8 @@ static inline void xen_balloon_init(void)
 {
 }
 #endif
+
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+struct resource;
+void arch_xen_balloon_init(struct resource *hostmem_resource);
+#endif
index 2934249fba46746253b89d14f581547ada3d0bad..a9a2e2c86671a18c02c49643151204dd3358157b 100644 (file)
@@ -461,10 +461,15 @@ endmenu # "CPU/Task time and stats accounting"
 
 config CPU_ISOLATION
        bool "CPU isolation"
+       depends on SMP || COMPILE_TEST
+       default y
        help
          Make sure that CPUs running critical tasks are not disturbed by
          any source of "noise" such as unbound workqueues, timers, kthreads...
-         Unbound jobs get offloaded to housekeeping CPUs.
+         Unbound jobs get offloaded to housekeeping CPUs. This is driven by
+         the "isolcpus=" boot parameter.
+
+         Say Y if unsure.
 
 source "kernel/rcu/Kconfig"
 
@@ -1392,6 +1397,13 @@ config BPF_SYSCALL
          Enable the bpf() system call that allows to manipulate eBPF
          programs and maps via file descriptors.
 
+config BPF_JIT_ALWAYS_ON
+       bool "Permanently enable BPF JIT and remove BPF interpreter"
+       depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+       help
+         Enables BPF JIT and removes BPF interpreter to avoid
+         speculative execution of BPF instructions by the interpreter
+
 config USERFAULTFD
        bool "Enable userfaultfd() system call"
        select ANON_INODES
index e96e3a14533cda199963fe96b97dc78779c66037..a8100b9548398e8b102052f2c1418b21ea423825 100644 (file)
@@ -75,6 +75,7 @@
 #include <linux/slab.h>
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
+#include <linux/pti.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/sched_clock.h>
@@ -504,6 +505,10 @@ static void __init mm_init(void)
        pgtable_init();
        vmalloc_init();
        ioremap_huge_init();
+       /* Should be run before the first non-init thread is created */
+       init_espfix_bsp();
+       /* Should be run after espfix64 is set up. */
+       pti_init();
 }
 
 asmlinkage __visible void __init start_kernel(void)
@@ -678,10 +683,6 @@ asmlinkage __visible void __init start_kernel(void)
 #ifdef CONFIG_X86
        if (efi_enabled(EFI_RUNTIME_SERVICES))
                efi_enter_virtual_mode();
-#endif
-#ifdef CONFIG_X86_ESPFIX64
-       /* Should be run before the first non-init thread is created */
-       init_espfix_bsp();
 #endif
        thread_stack_cache_init();
        cred_init();
index d15c0ee4d95504a88337498045a84f53a6ae0d15..addf7732fb562f5b0eb51a0bd8894aec81cc8afe 100644 (file)
@@ -102,7 +102,7 @@ static int check_free_space(struct bsd_acct_struct *acct)
 {
        struct kstatfs sbuf;
 
-       if (time_is_before_jiffies(acct->needcheck))
+       if (time_is_after_jiffies(acct->needcheck))
                goto out;
 
        /* May block */
index 7c25426d3cf5699ffde545ee9a8d5e254f16f517..aaa319848e7d57422225aa99055b574dc904e36d 100644 (file)
@@ -53,9 +53,10 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 {
        bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
        int numa_node = bpf_map_attr_numa_node(attr);
+       u32 elem_size, index_mask, max_entries;
+       bool unpriv = !capable(CAP_SYS_ADMIN);
        struct bpf_array *array;
        u64 array_size;
-       u32 elem_size;
 
        /* check sanity of attributes */
        if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -72,11 +73,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 
        elem_size = round_up(attr->value_size, 8);
 
+       max_entries = attr->max_entries;
+       index_mask = roundup_pow_of_two(max_entries) - 1;
+
+       if (unpriv)
+               /* round up array size to nearest power of 2,
+                * since cpu will speculate within index_mask limits
+                */
+               max_entries = index_mask + 1;
+
        array_size = sizeof(*array);
        if (percpu)
-               array_size += (u64) attr->max_entries * sizeof(void *);
+               array_size += (u64) max_entries * sizeof(void *);
        else
-               array_size += (u64) attr->max_entries * elem_size;
+               array_size += (u64) max_entries * elem_size;
 
        /* make sure there is no u32 overflow later in round_up() */
        if (array_size >= U32_MAX - PAGE_SIZE)
@@ -86,6 +96,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
        array = bpf_map_area_alloc(array_size, numa_node);
        if (!array)
                return ERR_PTR(-ENOMEM);
+       array->index_mask = index_mask;
+       array->map.unpriv_array = unpriv;
 
        /* copy mandatory map attributes */
        array->map.map_type = attr->map_type;
@@ -121,12 +133,13 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
        if (unlikely(index >= array->map.max_entries))
                return NULL;
 
-       return array->value + array->elem_size * index;
+       return array->value + array->elem_size * (index & array->index_mask);
 }
 
 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
 static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
+       struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct bpf_insn *insn = insn_buf;
        u32 elem_size = round_up(map->value_size, 8);
        const int ret = BPF_REG_0;
@@ -135,7 +148,12 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 
        *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
        *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-       *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+       if (map->unpriv_array) {
+               *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
+               *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+       } else {
+               *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+       }
 
        if (is_power_of_2(elem_size)) {
                *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
@@ -157,7 +175,7 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
        if (unlikely(index >= array->map.max_entries))
                return NULL;
 
-       return this_cpu_ptr(array->pptrs[index]);
+       return this_cpu_ptr(array->pptrs[index & array->index_mask]);
 }
 
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
@@ -177,7 +195,7 @@ int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
         */
        size = round_up(map->value_size, 8);
        rcu_read_lock();
-       pptr = array->pptrs[index];
+       pptr = array->pptrs[index & array->index_mask];
        for_each_possible_cpu(cpu) {
                bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
                off += size;
@@ -225,10 +243,11 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
                return -EEXIST;
 
        if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
-               memcpy(this_cpu_ptr(array->pptrs[index]),
+               memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
                       value, map->value_size);
        else
-               memcpy(array->value + array->elem_size * index,
+               memcpy(array->value +
+                      array->elem_size * (index & array->index_mask),
                       value, map->value_size);
        return 0;
 }
@@ -262,7 +281,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
         */
        size = round_up(map->value_size, 8);
        rcu_read_lock();
-       pptr = array->pptrs[index];
+       pptr = array->pptrs[index & array->index_mask];
        for_each_possible_cpu(cpu) {
                bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
                off += size;
@@ -613,6 +632,7 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
 static u32 array_of_map_gen_lookup(struct bpf_map *map,
                                   struct bpf_insn *insn_buf)
 {
+       struct bpf_array *array = container_of(map, struct bpf_array, map);
        u32 elem_size = round_up(map->value_size, 8);
        struct bpf_insn *insn = insn_buf;
        const int ret = BPF_REG_0;
@@ -621,7 +641,12 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
 
        *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
        *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-       *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+       if (map->unpriv_array) {
+               *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
+               *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+       } else {
+               *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+       }
        if (is_power_of_2(elem_size))
                *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
        else
index 86b50aa26ee80adac9ba7ac52248c64cbea19b26..51ec2dda7f08c6c90af084589bb6d80662c77d12 100644 (file)
@@ -767,6 +767,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 }
 EXPORT_SYMBOL_GPL(__bpf_call_base);
 
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 /**
  *     __bpf_prog_run - run eBPF program on a given context
  *     @ctx: is the data we are operating on
@@ -1317,6 +1318,14 @@ EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
 EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
 };
 
+#else
+static unsigned int __bpf_prog_ret0(const void *ctx,
+                                   const struct bpf_insn *insn)
+{
+       return 0;
+}
+#endif
+
 bool bpf_prog_array_compatible(struct bpf_array *array,
                               const struct bpf_prog *fp)
 {
@@ -1364,9 +1373,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
  */
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
        u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
 
        fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+#else
+       fp->bpf_func = __bpf_prog_ret0;
+#endif
 
        /* eBPF JITs can rewrite the program in case constant
         * blinding is active. However, in case of error during
@@ -1376,6 +1389,12 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
         */
        if (!bpf_prog_is_dev_bound(fp->aux)) {
                fp = bpf_int_jit_compile(fp);
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+               if (!fp->jited) {
+                       *err = -ENOTSUPP;
+                       return fp;
+               }
+#endif
        } else {
                *err = bpf_prog_offload_compile(fp);
                if (*err)
index 01aaef1a77c5af164660b0f75ac99e4bd7c55a52..5bb5e49ef4c3831050cf9fa92278a150fde40bfa 100644 (file)
@@ -368,7 +368,45 @@ out:
        putname(pname);
        return ret;
 }
-EXPORT_SYMBOL_GPL(bpf_obj_get_user);
+
+static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
+{
+       struct bpf_prog *prog;
+       int ret = inode_permission(inode, MAY_READ | MAY_WRITE);
+       if (ret)
+               return ERR_PTR(ret);
+
+       if (inode->i_op == &bpf_map_iops)
+               return ERR_PTR(-EINVAL);
+       if (inode->i_op != &bpf_prog_iops)
+               return ERR_PTR(-EACCES);
+
+       prog = inode->i_private;
+
+       ret = security_bpf_prog(prog);
+       if (ret < 0)
+               return ERR_PTR(ret);
+
+       if (!bpf_prog_get_ok(prog, &type, false))
+               return ERR_PTR(-EINVAL);
+
+       return bpf_prog_inc(prog);
+}
+
+struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)
+{
+       struct bpf_prog *prog;
+       struct path path;
+       int ret = kern_path(name, LOOKUP_FOLLOW, &path);
+       if (ret)
+               return ERR_PTR(ret);
+       prog = __get_prog_inode(d_backing_inode(path.dentry), type);
+       if (!IS_ERR(prog))
+               touch_atime(&path);
+       path_put(&path);
+       return prog;
+}
+EXPORT_SYMBOL(bpf_prog_get_type_path);
 
 static void bpf_evict_inode(struct inode *inode)
 {
index 5ee2e41893d9662641001c1de8d0776fccb590aa..1712d319c2d84292a39cb5c04cd7e5459f6d8174 100644 (file)
@@ -591,8 +591,15 @@ static void sock_map_free(struct bpf_map *map)
 
                write_lock_bh(&sock->sk_callback_lock);
                psock = smap_psock_sk(sock);
-               smap_list_remove(psock, &stab->sock_map[i]);
-               smap_release_sock(psock, sock);
+               /* This check handles a racing sock event that can get the
+                * sk_callback_lock before this case but after xchg happens
+                * causing the refcnt to hit zero and sock user data (psock)
+                * to be null and queued for garbage collection.
+                */
+               if (likely(psock)) {
+                       smap_list_remove(psock, &stab->sock_map[i]);
+                       smap_release_sock(psock, sock);
+               }
                write_unlock_bh(&sock->sk_callback_lock);
        }
        rcu_read_unlock();
index 2c4cfeaa8d5e785f16758be08cb8a462766363d9..5cb783fc8224b3c5acb65bae4d79c0ec74a71c7a 100644 (file)
@@ -1057,7 +1057,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
 
-static bool bpf_prog_get_ok(struct bpf_prog *prog,
+bool bpf_prog_get_ok(struct bpf_prog *prog,
                            enum bpf_prog_type *attach_type, bool attach_drv)
 {
        /* not an attachment, just a refcount inc, always allow */
index d4593571c4049b8d046f53f81f8e17911a21e0c9..b414d6b2d47070505f3a60fcd8b58478b293d2ad 100644 (file)
@@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
                break;
        case PTR_TO_STACK:
                pointer_desc = "stack ";
+               /* The stack spill tracking logic in check_stack_write()
+                * and check_stack_read() relies on stack accesses being
+                * aligned.
+                */
+               strict = true;
                break;
        default:
                break;
@@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
                                           strict);
 }
 
+/* truncate register to smaller size (in bytes)
+ * must be called with size < BPF_REG_SIZE
+ */
+static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
+{
+       u64 mask;
+
+       /* clear high bits in bit representation */
+       reg->var_off = tnum_cast(reg->var_off, size);
+
+       /* fix arithmetic bounds */
+       mask = ((u64)1 << (size * 8)) - 1;
+       if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
+               reg->umin_value &= mask;
+               reg->umax_value &= mask;
+       } else {
+               reg->umin_value = 0;
+               reg->umax_value = mask;
+       }
+       reg->smin_value = reg->umin_value;
+       reg->smax_value = reg->umax_value;
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
        if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
            regs[value_regno].type == SCALAR_VALUE) {
                /* b/h/w load zero-extends, mark upper bits as known 0 */
-               regs[value_regno].var_off =
-                       tnum_cast(regs[value_regno].var_off, size);
-               __update_reg_bounds(&regs[value_regno]);
+               coerce_reg_to_size(&regs[value_regno], size);
        }
        return err;
 }
@@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
                tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
                verbose(env, "invalid variable stack read R%d var_off=%s\n",
                        regno, tn_buf);
+               return -EACCES;
        }
        off = regs[regno].off + regs[regno].var_off.value;
        if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
                return -EINVAL;
        }
 
+       /* With LD_ABS/IND some JITs save/restore skb from r1. */
        changes_data = bpf_helper_changes_pkt_data(fn->func);
+       if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
+               verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
+                       func_id_name(func_id), func_id);
+               return -EINVAL;
+       }
 
        memset(&meta, 0, sizeof(meta));
        meta.pkt_access = fn->pkt_access;
@@ -1696,6 +1729,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
        err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
        if (err)
                return err;
+       if (func_id == BPF_FUNC_tail_call) {
+               if (meta.map_ptr == NULL) {
+                       verbose(env, "verifier bug\n");
+                       return -EINVAL;
+               }
+               env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
+       }
        err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
        if (err)
                return err;
@@ -1766,14 +1806,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
        return 0;
 }
 
-static void coerce_reg_to_32(struct bpf_reg_state *reg)
-{
-       /* clear high 32 bits */
-       reg->var_off = tnum_cast(reg->var_off, 4);
-       /* Update bounds */
-       __update_reg_bounds(reg);
-}
-
 static bool signed_add_overflows(s64 a, s64 b)
 {
        /* Do the add in u64, where overflow is well-defined */
@@ -1794,6 +1826,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
        return res > a;
 }
 
+static bool check_reg_sane_offset(struct bpf_verifier_env *env,
+                                 const struct bpf_reg_state *reg,
+                                 enum bpf_reg_type type)
+{
+       bool known = tnum_is_const(reg->var_off);
+       s64 val = reg->var_off.value;
+       s64 smin = reg->smin_value;
+
+       if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
+               verbose(env, "math between %s pointer and %lld is not allowed\n",
+                       reg_type_str[type], val);
+               return false;
+       }
+
+       if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
+               verbose(env, "%s pointer offset %d is not allowed\n",
+                       reg_type_str[type], reg->off);
+               return false;
+       }
+
+       if (smin == S64_MIN) {
+               verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
+                       reg_type_str[type]);
+               return false;
+       }
+
+       if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
+               verbose(env, "value %lld makes %s pointer be out of bounds\n",
+                       smin, reg_type_str[type]);
+               return false;
+       }
+
+       return true;
+}
+
 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
  * Caller should also handle BPF_MOV case separately.
  * If we return -EACCES, caller may want to try again treating pointer as a
@@ -1830,29 +1897,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 
        if (BPF_CLASS(insn->code) != BPF_ALU64) {
                /* 32-bit ALU ops on pointers produce (meaningless) scalars */
-               if (!env->allow_ptr_leaks)
-                       verbose(env,
-                               "R%d 32-bit pointer arithmetic prohibited\n",
-                               dst);
+               verbose(env,
+                       "R%d 32-bit pointer arithmetic prohibited\n",
+                       dst);
                return -EACCES;
        }
 
        if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
+                       dst);
                return -EACCES;
        }
        if (ptr_reg->type == CONST_PTR_TO_MAP) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
+                       dst);
                return -EACCES;
        }
        if (ptr_reg->type == PTR_TO_PACKET_END) {
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
-                               dst);
+               verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
+                       dst);
                return -EACCES;
        }
 
@@ -1862,6 +1925,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        dst_reg->type = ptr_reg->type;
        dst_reg->id = ptr_reg->id;
 
+       if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
+           !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
+               return -EINVAL;
+
        switch (opcode) {
        case BPF_ADD:
                /* We can take a fixed offset as long as it doesn't overflow
@@ -1915,9 +1982,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        case BPF_SUB:
                if (dst_reg == off_reg) {
                        /* scalar -= pointer.  Creates an unknown scalar */
-                       if (!env->allow_ptr_leaks)
-                               verbose(env, "R%d tried to subtract pointer from scalar\n",
-                                       dst);
+                       verbose(env, "R%d tried to subtract pointer from scalar\n",
+                               dst);
                        return -EACCES;
                }
                /* We don't allow subtraction from FP, because (according to
@@ -1925,9 +1991,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
                 * be able to deal with it.
                 */
                if (ptr_reg->type == PTR_TO_STACK) {
-                       if (!env->allow_ptr_leaks)
-                               verbose(env, "R%d subtraction from stack pointer prohibited\n",
-                                       dst);
+                       verbose(env, "R%d subtraction from stack pointer prohibited\n",
+                               dst);
                        return -EACCES;
                }
                if (known && (ptr_reg->off - smin_val ==
@@ -1976,28 +2041,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        case BPF_AND:
        case BPF_OR:
        case BPF_XOR:
-               /* bitwise ops on pointers are troublesome, prohibit for now.
-                * (However, in principle we could allow some cases, e.g.
-                * ptr &= ~3 which would reduce min_value by 3.)
-                */
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
-                               dst, bpf_alu_string[opcode >> 4]);
+               /* bitwise ops on pointers are troublesome, prohibit. */
+               verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
+                       dst, bpf_alu_string[opcode >> 4]);
                return -EACCES;
        default:
                /* other operators (e.g. MUL,LSH) produce non-pointer results */
-               if (!env->allow_ptr_leaks)
-                       verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
-                               dst, bpf_alu_string[opcode >> 4]);
+               verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
+                       dst, bpf_alu_string[opcode >> 4]);
                return -EACCES;
        }
 
+       if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
+               return -EINVAL;
+
        __update_reg_bounds(dst_reg);
        __reg_deduce_bounds(dst_reg);
        __reg_bound_offset(dst_reg);
        return 0;
 }
 
+/* WARNING: This function does calculations on 64-bit values, but the actual
+ * execution may occur on 32-bit values. Therefore, things like bitshifts
+ * need extra checks in the 32-bit case.
+ */
 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                                      struct bpf_insn *insn,
                                      struct bpf_reg_state *dst_reg,
@@ -2008,12 +2075,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
        bool src_known, dst_known;
        s64 smin_val, smax_val;
        u64 umin_val, umax_val;
+       u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
 
-       if (BPF_CLASS(insn->code) != BPF_ALU64) {
-               /* 32-bit ALU ops are (32,32)->64 */
-               coerce_reg_to_32(dst_reg);
-               coerce_reg_to_32(&src_reg);
-       }
        smin_val = src_reg.smin_value;
        smax_val = src_reg.smax_value;
        umin_val = src_reg.umin_value;
@@ -2021,6 +2084,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
        src_known = tnum_is_const(src_reg.var_off);
        dst_known = tnum_is_const(dst_reg->var_off);
 
+       if (!src_known &&
+           opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
+               __mark_reg_unknown(dst_reg);
+               return 0;
+       }
+
        switch (opcode) {
        case BPF_ADD:
                if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2149,9 +2218,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                __update_reg_bounds(dst_reg);
                break;
        case BPF_LSH:
-               if (umax_val > 63) {
-                       /* Shifts greater than 63 are undefined.  This includes
-                        * shifts by a negative number.
+               if (umax_val >= insn_bitness) {
+                       /* Shifts greater than 31 or 63 are undefined.
+                        * This includes shifts by a negative number.
                         */
                        mark_reg_unknown(env, regs, insn->dst_reg);
                        break;
@@ -2177,27 +2246,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                __update_reg_bounds(dst_reg);
                break;
        case BPF_RSH:
-               if (umax_val > 63) {
-                       /* Shifts greater than 63 are undefined.  This includes
-                        * shifts by a negative number.
+               if (umax_val >= insn_bitness) {
+                       /* Shifts greater than 31 or 63 are undefined.
+                        * This includes shifts by a negative number.
                         */
                        mark_reg_unknown(env, regs, insn->dst_reg);
                        break;
                }
-               /* BPF_RSH is an unsigned shift, so make the appropriate casts */
-               if (dst_reg->smin_value < 0) {
-                       if (umin_val) {
-                               /* Sign bit will be cleared */
-                               dst_reg->smin_value = 0;
-                       } else {
-                               /* Lost sign bit information */
-                               dst_reg->smin_value = S64_MIN;
-                               dst_reg->smax_value = S64_MAX;
-                       }
-               } else {
-                       dst_reg->smin_value =
-                               (u64)(dst_reg->smin_value) >> umax_val;
-               }
+               /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
+                * be negative, then either:
+                * 1) src_reg might be zero, so the sign bit of the result is
+                *    unknown, so we lose our signed bounds
+                * 2) it's known negative, thus the unsigned bounds capture the
+                *    signed bounds
+                * 3) the signed bounds cross zero, so they tell us nothing
+                *    about the result
+                * If the value in dst_reg is known nonnegative, then again the
+                * unsigned bounts capture the signed bounds.
+                * Thus, in all cases it suffices to blow away our signed bounds
+                * and rely on inferring new ones from the unsigned bounds and
+                * var_off of the result.
+                */
+               dst_reg->smin_value = S64_MIN;
+               dst_reg->smax_value = S64_MAX;
                if (src_known)
                        dst_reg->var_off = tnum_rshift(dst_reg->var_off,
                                                       umin_val);
@@ -2213,6 +2284,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
                break;
        }
 
+       if (BPF_CLASS(insn->code) != BPF_ALU64) {
+               /* 32-bit ALU ops are (32,32)->32 */
+               coerce_reg_to_size(dst_reg, 4);
+               coerce_reg_to_size(&src_reg, 4);
+       }
+
        __reg_deduce_bounds(dst_reg);
        __reg_bound_offset(dst_reg);
        return 0;
@@ -2227,7 +2304,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
        struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
        struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
        u8 opcode = BPF_OP(insn->code);
-       int rc;
 
        dst_reg = &regs[insn->dst_reg];
        src_reg = NULL;
@@ -2238,43 +2314,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                if (src_reg->type != SCALAR_VALUE) {
                        if (dst_reg->type != SCALAR_VALUE) {
                                /* Combining two pointers by any ALU op yields
-                                * an arbitrary scalar.
+                                * an arbitrary scalar. Disallow all math except
+                                * pointer subtraction
                                 */
-                               if (!env->allow_ptr_leaks) {
-                                       verbose(env, "R%d pointer %s pointer prohibited\n",
-                                               insn->dst_reg,
-                                               bpf_alu_string[opcode >> 4]);
-                                       return -EACCES;
+                               if (opcode == BPF_SUB){
+                                       mark_reg_unknown(env, regs, insn->dst_reg);
+                                       return 0;
                                }
-                               mark_reg_unknown(env, regs, insn->dst_reg);
-                               return 0;
+                               verbose(env, "R%d pointer %s pointer prohibited\n",
+                                       insn->dst_reg,
+                                       bpf_alu_string[opcode >> 4]);
+                               return -EACCES;
                        } else {
                                /* scalar += pointer
                                 * This is legal, but we have to reverse our
                                 * src/dest handling in computing the range
                                 */
-                               rc = adjust_ptr_min_max_vals(env, insn,
-                                                            src_reg, dst_reg);
-                               if (rc == -EACCES && env->allow_ptr_leaks) {
-                                       /* scalar += unknown scalar */
-                                       __mark_reg_unknown(&off_reg);
-                                       return adjust_scalar_min_max_vals(
-                                                       env, insn,
-                                                       dst_reg, off_reg);
-                               }
-                               return rc;
+                               return adjust_ptr_min_max_vals(env, insn,
+                                                              src_reg, dst_reg);
                        }
                } else if (ptr_reg) {
                        /* pointer += scalar */
-                       rc = adjust_ptr_min_max_vals(env, insn,
-                                                    dst_reg, src_reg);
-                       if (rc == -EACCES && env->allow_ptr_leaks) {
-                               /* unknown scalar += scalar */
-                               __mark_reg_unknown(dst_reg);
-                               return adjust_scalar_min_max_vals(
-                                               env, insn, dst_reg, *src_reg);
-                       }
-                       return rc;
+                       return adjust_ptr_min_max_vals(env, insn,
+                                                      dst_reg, src_reg);
                }
        } else {
                /* Pretend the src is a reg with a known value, since we only
@@ -2283,17 +2345,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                off_reg.type = SCALAR_VALUE;
                __mark_reg_known(&off_reg, insn->imm);
                src_reg = &off_reg;
-               if (ptr_reg) { /* pointer += K */
-                       rc = adjust_ptr_min_max_vals(env, insn,
-                                                    ptr_reg, src_reg);
-                       if (rc == -EACCES && env->allow_ptr_leaks) {
-                               /* unknown scalar += K */
-                               __mark_reg_unknown(dst_reg);
-                               return adjust_scalar_min_max_vals(
-                                               env, insn, dst_reg, off_reg);
-                       }
-                       return rc;
-               }
+               if (ptr_reg) /* pointer += K */
+                       return adjust_ptr_min_max_vals(env, insn,
+                                                      ptr_reg, src_reg);
        }
 
        /* Got here implies adding two SCALAR_VALUEs */
@@ -2390,17 +2444,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
                                        return -EACCES;
                                }
                                mark_reg_unknown(env, regs, insn->dst_reg);
-                               /* high 32 bits are known zero. */
-                               regs[insn->dst_reg].var_off = tnum_cast(
-                                               regs[insn->dst_reg].var_off, 4);
-                               __update_reg_bounds(&regs[insn->dst_reg]);
+                               coerce_reg_to_size(&regs[insn->dst_reg], 4);
                        }
                } else {
                        /* case: R = imm
                         * remember the value we stored into this reg
                         */
                        regs[insn->dst_reg].type = SCALAR_VALUE;
-                       __mark_reg_known(regs + insn->dst_reg, insn->imm);
+                       if (BPF_CLASS(insn->code) == BPF_ALU64) {
+                               __mark_reg_known(regs + insn->dst_reg,
+                                                insn->imm);
+                       } else {
+                               __mark_reg_known(regs + insn->dst_reg,
+                                                (u32)insn->imm);
+                       }
                }
 
        } else if (opcode > BPF_END) {
@@ -3431,15 +3488,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
                        return range_within(rold, rcur) &&
                               tnum_in(rold->var_off, rcur->var_off);
                } else {
-                       /* if we knew anything about the old value, we're not
-                        * equal, because we can't know anything about the
-                        * scalar value of the pointer in the new value.
+                       /* We're trying to use a pointer in place of a scalar.
+                        * Even if the scalar was unbounded, this could lead to
+                        * pointer leaks because scalars are allowed to leak
+                        * while pointers are not. We could make this safe in
+                        * special cases if root is calling us, but it's
+                        * probably not worth the hassle.
                         */
-                       return rold->umin_value == 0 &&
-                              rold->umax_value == U64_MAX &&
-                              rold->smin_value == S64_MIN &&
-                              rold->smax_value == S64_MAX &&
-                              tnum_is_unknown(rold->var_off);
+                       return false;
                }
        case PTR_TO_MAP_VALUE:
                /* If the new min/max/var_off satisfy the old ones and
@@ -4407,6 +4463,35 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                         */
                        insn->imm = 0;
                        insn->code = BPF_JMP | BPF_TAIL_CALL;
+
+                       /* instead of changing every JIT dealing with tail_call
+                        * emit two extra insns:
+                        * if (index >= max_entries) goto out;
+                        * index &= array->index_mask;
+                        * to avoid out-of-bounds cpu speculation
+                        */
+                       map_ptr = env->insn_aux_data[i + delta].map_ptr;
+                       if (map_ptr == BPF_MAP_PTR_POISON) {
+                               verbose(env, "tail_call obusing map_ptr\n");
+                               return -EINVAL;
+                       }
+                       if (!map_ptr->unpriv_array)
+                               continue;
+                       insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
+                                                 map_ptr->max_entries, 2);
+                       insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
+                                                   container_of(map_ptr,
+                                                                struct bpf_array,
+                                                                map)->index_mask);
+                       insn_buf[2] = *insn;
+                       cnt = 3;
+                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta    += cnt - 1;
+                       env->prog = prog = new_prog;
+                       insn      = new_prog->insnsi + i + delta;
                        continue;
                }
 
index 024085daab1aede5958235b0663c19ec667b5836..a2c05d2476ac5fd3d530ba64938cd85be52fece2 100644 (file)
@@ -123,7 +123,11 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
         */
        do {
                css_task_iter_start(&from->self, 0, &it);
-               task = css_task_iter_next(&it);
+
+               do {
+                       task = css_task_iter_next(&it);
+               } while (task && (task->flags & PF_EXITING));
+
                if (task)
                        get_task_struct(task);
                css_task_iter_end(&it);
index 0b1ffe147f240c39726d79505c4e02e8fe40cd47..2cf06c274e4ca6cc66194f4ce0fcb9ad0ad82f88 100644 (file)
@@ -1397,7 +1397,7 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
                         cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
                         cft->name);
        else
-               strncpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
+               strlcpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
        return buf;
 }
 
@@ -1864,9 +1864,9 @@ void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts)
 
        root->flags = opts->flags;
        if (opts->release_agent)
-               strcpy(root->release_agent_path, opts->release_agent);
+               strlcpy(root->release_agent_path, opts->release_agent, PATH_MAX);
        if (opts->name)
-               strcpy(root->name, opts->name);
+               strlcpy(root->name, opts->name, MAX_CGROUP_ROOT_NAMELEN);
        if (opts->cpuset_clone_children)
                set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
@@ -4125,26 +4125,24 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
 
 static void css_task_iter_advance(struct css_task_iter *it)
 {
-       struct list_head *l = it->task_pos;
+       struct list_head *next;
 
        lockdep_assert_held(&css_set_lock);
-       WARN_ON_ONCE(!l);
-
 repeat:
        /*
         * Advance iterator to find next entry.  cset->tasks is consumed
         * first and then ->mg_tasks.  After ->mg_tasks, we move onto the
         * next cset.
         */
-       l = l->next;
+       next = it->task_pos->next;
 
-       if (l == it->tasks_head)
-               l = it->mg_tasks_head->next;
+       if (next == it->tasks_head)
+               next = it->mg_tasks_head->next;
 
-       if (l == it->mg_tasks_head)
+       if (next == it->mg_tasks_head)
                css_task_iter_advance_css_set(it);
        else
-               it->task_pos = l;
+               it->task_pos = next;
 
        /* if PROCS, skip over tasks which aren't group leaders */
        if ((it->flags & CSS_TASK_ITER_PROCS) && it->task_pos &&
index 41376c3ac93b06c8163d6d764fc9a015bafdcdb4..53f7dc65f9a3b917c1c347834257cf26521eff95 100644 (file)
@@ -80,19 +80,19 @@ static struct lockdep_map cpuhp_state_down_map =
        STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
 
 
-static void inline cpuhp_lock_acquire(bool bringup)
+static inline void cpuhp_lock_acquire(bool bringup)
 {
        lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
 }
 
-static void inline cpuhp_lock_release(bool bringup)
+static inline void cpuhp_lock_release(bool bringup)
 {
        lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
 }
 #else
 
-static void inline cpuhp_lock_acquire(bool bringup) { }
-static void inline cpuhp_lock_release(bool bringup) { }
+static inline void cpuhp_lock_acquire(bool bringup) { }
+static inline void cpuhp_lock_release(bool bringup) { }
 
 #endif
 
@@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = {
         * before blk_mq_queue_reinit_notify() from notify_dead(),
         * otherwise a RCU stall occurs.
         */
-       [CPUHP_TIMERS_DEAD] = {
+       [CPUHP_TIMERS_PREPARE] = {
                .name                   = "timers:dead",
-               .startup.single         = NULL,
+               .startup.single         = timers_prepare_cpu,
                .teardown.single        = timers_dead_cpu,
        },
        /* Kicks the plugged cpu into life */
index b3663896278ed71f854963024caa6549b6a57935..4f63597c824dfa81542a38268f9ec339f7fe0c47 100644 (file)
@@ -410,7 +410,7 @@ static int __init crash_save_vmcoreinfo_init(void)
        VMCOREINFO_SYMBOL(contig_page_data);
 #endif
 #ifdef CONFIG_SPARSEMEM
-       VMCOREINFO_SYMBOL(mem_section);
+       VMCOREINFO_SYMBOL_ARRAY(mem_section);
        VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
        VMCOREINFO_STRUCT_SIZE(mem_section);
        VMCOREINFO_OFFSET(mem_section, section_mem_map);
index df0c91d5606c2fdd80ea7bb42a2deea3c83eec4d..995453d9fb5529d0e210538cf27943839ec67721 100644 (file)
@@ -1763,3 +1763,4 @@ __weak void abort(void)
        /* if that doesn't kill us, halt */
        panic("Oops failed to kill thread");
 }
+EXPORT_SYMBOL(abort);
index 432eadf6b58c18d9de6a3d09f3fef36089b4b5a2..2295fc69717f6c3d877ef3cac15b55336d7746c6 100644 (file)
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                        goto out;
        }
        /* a new mm has just been created */
-       arch_dup_mmap(oldmm, mm);
-       retval = 0;
+       retval = arch_dup_mmap(oldmm, mm);
 out:
        up_write(&mm->mmap_sem);
        flush_tlb_mm(oldmm);
index 17f05ef8f575f996f87e614a914bbe21d537f949..e4d3819a91cc7d7bda5416bfd5aaba99a5576cce 100644 (file)
 
 static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
+       static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
+
+       if (!__ratelimit(&ratelimit))
+               return;
+
        printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
                irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
        printk("->handle_irq():  %p, ", desc->handle_irq);
index 7f608ac3965379fc112d85051fd80325b2ec84ce..acfaaef8672ad2b1d2419bd7522f5556ebb1ae50 100644 (file)
@@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = {
        BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
        BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
        BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
+       BIT_MASK_DESCR(IRQD_CAN_RESERVE),
 
        BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
 
index c26c5bb6b491f75f76f1190cdc21989f79d17e09..508c03dfef254b9dc8c3a7a1e3e7307d3a7201b3 100644 (file)
@@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
 EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip);
 
 /*
- * Separate lockdep class for interrupt chip which can nest irq_desc
- * lock.
+ * Separate lockdep classes for interrupt chip which can nest irq_desc
+ * lock and request mutex.
  */
 static struct lock_class_key irq_nested_lock_class;
+static struct lock_class_key irq_nested_request_class;
 
 /*
  * irq_map_generic_chip - Map a generic chip for an irq domain
@@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
        set_bit(idx, &gc->installed);
 
        if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK)
-               irq_set_lockdep_class(virq, &irq_nested_lock_class);
+               irq_set_lockdep_class(virq, &irq_nested_lock_class,
+                                     &irq_nested_request_class);
 
        if (chip->irq_calc_mask)
                chip->irq_calc_mask(data);
@@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
                        continue;
 
                if (flags & IRQ_GC_INIT_NESTED_LOCK)
-                       irq_set_lockdep_class(i, &irq_nested_lock_class);
+                       irq_set_lockdep_class(i, &irq_nested_lock_class,
+                                             &irq_nested_request_class);
 
                if (!(flags & IRQ_GC_NO_MASK)) {
                        struct irq_data *d = irq_get_irq_data(i);
index 07d08ca701ec4627b558d0435c54cf6de7e147d2..ab19371eab9b8e1e9a7789b882ed4a308883b555 100644 (file)
@@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
 #endif /* !CONFIG_GENERIC_PENDING_IRQ */
 
 #if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
-static inline int irq_domain_activate_irq(struct irq_data *data, bool early)
+static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve)
 {
        irqd_set_activated(data);
        return 0;
index 4f4f60015e8ab4196ef1df5107f04beda37d4c6c..62068ad46930dd12088081df5233dffdfee0d7f4 100644 (file)
@@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
        }
 }
 
-static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
+static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve)
 {
        int ret = 0;
 
@@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
 
                if (irqd->parent_data)
                        ret = __irq_domain_activate_irq(irqd->parent_data,
-                                                       early);
+                                                       reserve);
                if (!ret && domain->ops->activate) {
-                       ret = domain->ops->activate(domain, irqd, early);
+                       ret = domain->ops->activate(domain, irqd, reserve);
                        /* Rollback in case of error */
                        if (ret && irqd->parent_data)
                                __irq_domain_deactivate_irq(irqd->parent_data);
@@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
 /**
  * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
  *                          interrupt
- * @irq_data:  outermost irq_data associated with interrupt
+ * @irq_data:  Outermost irq_data associated with interrupt
+ * @reserve:   If set only reserve an interrupt vector instead of assigning one
  *
  * This is the second step to call domain_ops->activate to program interrupt
  * controllers, so the interrupt could actually get delivered.
  */
-int irq_domain_activate_irq(struct irq_data *irq_data, bool early)
+int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve)
 {
        int ret = 0;
 
        if (!irqd_is_activated(irq_data))
-               ret = __irq_domain_activate_irq(irq_data, early);
+               ret = __irq_domain_activate_irq(irq_data, reserve);
        if (!ret)
                irqd_set_activated(irq_data);
        return ret;
index edb987b2c58dc1553342b5a87c91335b42888e8b..2f3c4f5382cc6bad8daf528146cf613d9a1ac3e6 100644 (file)
@@ -339,6 +339,40 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
        return ret;
 }
 
+/*
+ * Carefully check whether the device can use reservation mode. If
+ * reservation mode is enabled then the early activation will assign a
+ * dummy vector to the device. If the PCI/MSI device does not support
+ * masking of the entry then this can result in spurious interrupts when
+ * the device driver is not absolutely careful. But even then a malfunction
+ * of the hardware could result in a spurious interrupt on the dummy vector
+ * and render the device unusable. If the entry can be masked then the core
+ * logic will prevent the spurious interrupt and reservation mode can be
+ * used. For now reservation mode is restricted to PCI/MSI.
+ */
+static bool msi_check_reservation_mode(struct irq_domain *domain,
+                                      struct msi_domain_info *info,
+                                      struct device *dev)
+{
+       struct msi_desc *desc;
+
+       if (domain->bus_token != DOMAIN_BUS_PCI_MSI)
+               return false;
+
+       if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
+               return false;
+
+       if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
+               return false;
+
+       /*
+        * Checking the first MSI descriptor is sufficient. MSIX supports
+        * masking and MSI does so when the maskbit is set.
+        */
+       desc = first_msi_entry(dev);
+       return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
+}
+
 /**
  * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
  * @domain:    The domain to allocate from
@@ -353,9 +387,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 {
        struct msi_domain_info *info = domain->host_data;
        struct msi_domain_ops *ops = info->ops;
-       msi_alloc_info_t arg;
+       struct irq_data *irq_data;
        struct msi_desc *desc;
+       msi_alloc_info_t arg;
        int i, ret, virq;
+       bool can_reserve;
 
        ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
        if (ret)
@@ -385,6 +421,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
        if (ops->msi_finish)
                ops->msi_finish(&arg, 0);
 
+       can_reserve = msi_check_reservation_mode(domain, info, dev);
+
        for_each_msi_entry(desc, dev) {
                virq = desc->irq;
                if (desc->nvec_used == 1)
@@ -397,15 +435,25 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
                 * the MSI entries before the PCI layer enables MSI in the
                 * card. Otherwise the card latches a random msi message.
                 */
-               if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
-                       struct irq_data *irq_data;
+               if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
+                       continue;
 
+               irq_data = irq_domain_get_irq_data(domain, desc->irq);
+               if (!can_reserve)
+                       irqd_clr_can_reserve(irq_data);
+               ret = irq_domain_activate_irq(irq_data, can_reserve);
+               if (ret)
+                       goto cleanup;
+       }
+
+       /*
+        * If these interrupts use reservation mode, clear the activated bit
+        * so request_irq() will assign the final vector.
+        */
+       if (can_reserve) {
+               for_each_msi_entry(desc, dev) {
                        irq_data = irq_domain_get_irq_data(domain, desc->irq);
-                       ret = irq_domain_activate_irq(irq_data, true);
-                       if (ret)
-                               goto cleanup;
-                       if (info->flags & MSI_FLAG_MUST_REACTIVATE)
-                               irqd_clr_activated(irq_data);
+                       irqd_clr_activated(irq_data);
                }
        }
        return 0;
index b13b624e2c4902c67d2ae789c7af8fb2d6269cd8..1e8bb6550ec4bf61c367806dc31010e8a25f47b9 100644 (file)
@@ -193,10 +193,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
        }
 
        if (unlikely(is_child_reaper(pid))) {
-               if (pid_ns_prepare_proc(ns)) {
-                       disable_pid_allocation(ns);
+               if (pid_ns_prepare_proc(ns))
                        goto out_free;
-               }
        }
 
        get_pid_ns(ns);
@@ -226,6 +224,10 @@ out_free:
        while (++i <= ns->level)
                idr_remove(&ns->idr, (pid->numbers + i)->nr);
 
+       /* On failure to allocate the first pid, reset the state */
+       if (ns->pid_allocated == PIDNS_ADDING)
+               idr_set_cursor(&ns->idr, 0);
+
        spin_unlock_irq(&pidmap_lock);
 
        kmem_cache_free(ns->pid_cachep, pid);
index 2ddaec40956f7cf1356e17404e758b9a11924c8d..0926aef10dadc26d73959b5d7a736e247320a7de 100644 (file)
@@ -34,11 +34,6 @@ void complete(struct completion *x)
 
        spin_lock_irqsave(&x->wait.lock, flags);
 
-       /*
-        * Perform commit of crossrelease here.
-        */
-       complete_release_commit(x);
-
        if (x->done != UINT_MAX)
                x->done++;
        __wake_up_locked(&x->wait, TASK_NORMAL, 1);
index 2f52ec0f1539fce62a099d9fa559df334a19869c..d6717a3331a1b21bd1be02f034596ab293e8ceac 100644 (file)
@@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
 #ifdef CONFIG_NO_HZ_COMMON
 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
 {
-       unsigned long idle_calls = tick_nohz_get_idle_calls();
+       unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
        bool ret = idle_calls == sg_cpu->saved_idle_calls;
 
        sg_cpu->saved_idle_calls = idle_calls;
index dd7908743dab696facd9dd32f4399ee952228151..9bcbacba82a8115ddceda7fb26097a23c50d44f5 100644 (file)
@@ -89,7 +89,9 @@ static int membarrier_private_expedited(void)
                rcu_read_unlock();
        }
        if (!fallback) {
+               preempt_disable();
                smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
+               preempt_enable();
                free_cpumask_var(tmpmask);
        }
        cpus_read_unlock();
index e776fc8cc1df3bc6dd09e7722250f87f183fb5ef..f6b5f19223d6cb78efb3179f6dc0fd15383aeded 100644 (file)
@@ -95,6 +95,7 @@ config NO_HZ_FULL
        select RCU_NOCB_CPU
        select VIRT_CPU_ACCOUNTING_GEN
        select IRQ_WORK
+       select CPU_ISOLATION
        help
         Adaptively try to shutdown the tick whenever possible, even when
         the CPU is running tasks. Typically this requires running a single
index 99578f06c8d4fe57cb15cdd44fd58865cae8d0f3..f7cc7abfcf252f0cb6fadb3b9437b9fd096b8baf 100644 (file)
@@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
        ts->next_tick = 0;
 }
 
+static inline bool local_timer_softirq_pending(void)
+{
+       return local_softirq_pending() & TIMER_SOFTIRQ;
+}
+
 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
                                         ktime_t now, int cpu)
 {
@@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
        } while (read_seqretry(&jiffies_lock, seq));
        ts->last_jiffies = basejiff;
 
-       if (rcu_needs_cpu(basemono, &next_rcu) ||
-           arch_needs_cpu() || irq_work_needs_cpu()) {
+       /*
+        * Keep the periodic tick, when RCU, architecture or irq_work
+        * requests it.
+        * Aside of that check whether the local timer softirq is
+        * pending. If so its a bad idea to call get_next_timer_interrupt()
+        * because there is an already expired timer, so it will request
+        * immeditate expiry, which rearms the hardware timer with a
+        * minimal delta which brings us back to this place
+        * immediately. Lather, rinse and repeat...
+        */
+       if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
+           irq_work_needs_cpu() || local_timer_softirq_pending()) {
                next_tick = basemono + TICK_NSEC;
        } else {
                /*
@@ -985,6 +1000,19 @@ ktime_t tick_nohz_get_sleep_length(void)
        return ts->sleep_length;
 }
 
+/**
+ * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
+ * for a particular CPU.
+ *
+ * Called from the schedutil frequency scaling governor in scheduler context.
+ */
+unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
+{
+       struct tick_sched *ts = tick_get_tick_sched(cpu);
+
+       return ts->idle_calls;
+}
+
 /**
  * tick_nohz_get_idle_calls - return the current idle calls counter value
  *
index ffebcf878fba5d5cf67f5e9abcece25c16259919..89a9e1b4264a07a9a6d69e37759c27ced8b4de37 100644 (file)
@@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
        struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
 
        /*
-        * If the timer is deferrable and nohz is active then we need to use
-        * the deferrable base.
+        * If the timer is deferrable and NO_HZ_COMMON is set then we need
+        * to use the deferrable base.
         */
-       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
-           (tflags & TIMER_DEFERRABLE))
+       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
                base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
        return base;
 }
@@ -837,11 +836,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
        struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
        /*
-        * If the timer is deferrable and nohz is active then we need to use
-        * the deferrable base.
+        * If the timer is deferrable and NO_HZ_COMMON is set then we need
+        * to use the deferrable base.
         */
-       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
-           (tflags & TIMER_DEFERRABLE))
+       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
                base = this_cpu_ptr(&timer_bases[BASE_DEF]);
        return base;
 }
@@ -1009,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
        if (!ret && (options & MOD_TIMER_PENDING_ONLY))
                goto out_unlock;
 
-       debug_activate(timer, expires);
-
        new_base = get_target_base(base, timer->flags);
 
        if (base != new_base) {
@@ -1034,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
                }
        }
 
+       debug_activate(timer, expires);
+
        timer->expires = expires;
        /*
         * If 'idx' was calculated above and the base time did not advance
@@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
        base->must_forward_clk = false;
 
        __run_timers(base);
-       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
+       if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
                __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
 }
 
@@ -1855,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h
        }
 }
 
+int timers_prepare_cpu(unsigned int cpu)
+{
+       struct timer_base *base;
+       int b;
+
+       for (b = 0; b < NR_BASES; b++) {
+               base = per_cpu_ptr(&timer_bases[b], cpu);
+               base->clk = jiffies;
+               base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+               base->is_idle = false;
+               base->must_forward_clk = true;
+       }
+       return 0;
+}
+
 int timers_dead_cpu(unsigned int cpu)
 {
        struct timer_base *old_base;
index c87766c1c20446de2d191edda885669d447adecc..9ab18995ff1ebeea0e862f48b43f64924c87e127 100644 (file)
@@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 /* Missed count stored at end */
 #define RB_MISSED_STORED       (1 << 30)
 
+#define RB_MISSED_FLAGS                (RB_MISSED_EVENTS|RB_MISSED_STORED)
+
 struct buffer_data_page {
        u64              time_stamp;    /* page time stamp */
        local_t          commit;        /* write committed index */
@@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
  */
 size_t ring_buffer_page_len(void *page)
 {
-       return local_read(&((struct buffer_data_page *)page)->commit)
+       struct buffer_data_page *bpage = page;
+
+       return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
                + BUF_PAGE_HDR_SIZE;
 }
 
@@ -4400,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
 {
        struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
        struct buffer_data_page *bpage = data;
+       struct page *page = virt_to_page(bpage);
        unsigned long flags;
 
+       /* If the page is still in use someplace else, we can't reuse it */
+       if (page_ref_count(page) > 1)
+               goto out;
+
        local_irq_save(flags);
        arch_spin_lock(&cpu_buffer->lock);
 
@@ -4413,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
        arch_spin_unlock(&cpu_buffer->lock);
        local_irq_restore(flags);
 
+ out:
        free_page((unsigned long)bpage);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
index 59518b8126d04b4f1f62a526571490dda8398e3b..2a8d8a294345a258baca50b8a6b272c1ac0fc658 100644 (file)
@@ -6769,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                .spd_release    = buffer_spd_release,
        };
        struct buffer_ref *ref;
-       int entries, size, i;
+       int entries, i;
        ssize_t ret = 0;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
@@ -6823,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                        break;
                }
 
-               /*
-                * zero out any left over data, this is going to
-                * user land.
-                */
-               size = ring_buffer_page_len(ref->page);
-               if (size < PAGE_SIZE)
-                       memset(ref->page + size, 0, PAGE_SIZE - size);
-
                page = virt_to_page(ref->page);
 
                spd.pages[i] = page;
@@ -7588,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
        buf->data = alloc_percpu(struct trace_array_cpu);
        if (!buf->data) {
                ring_buffer_free(buf->buffer);
+               buf->buffer = NULL;
                return -ENOMEM;
        }
 
@@ -7611,7 +7604,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
                                    allocate_snapshot ? size : 1);
        if (WARN_ON(ret)) {
                ring_buffer_free(tr->trace_buffer.buffer);
+               tr->trace_buffer.buffer = NULL;
                free_percpu(tr->trace_buffer.data);
+               tr->trace_buffer.data = NULL;
                return -ENOMEM;
        }
        tr->allocated_snapshot = allocate_snapshot;
index c3e84edc47c965d40199b652ba78876cdaa9c70c..2615074d3de5c63e63da31995adc4a1f07f7e9fd 100644 (file)
@@ -346,7 +346,8 @@ static int kobject_uevent_net_broadcast(struct kobject *kobj,
 static void zap_modalias_env(struct kobj_uevent_env *env)
 {
        static const char modalias_prefix[] = "MODALIAS=";
-       int i;
+       size_t len;
+       int i, j;
 
        for (i = 0; i < env->envp_idx;) {
                if (strncmp(env->envp[i], modalias_prefix,
@@ -355,11 +356,18 @@ static void zap_modalias_env(struct kobj_uevent_env *env)
                        continue;
                }
 
-               if (i != env->envp_idx - 1)
-                       memmove(&env->envp[i], &env->envp[i + 1],
-                               sizeof(env->envp[i]) * env->envp_idx - 1);
+               len = strlen(env->envp[i]) + 1;
+
+               if (i != env->envp_idx - 1) {
+                       memmove(env->envp[i], env->envp[i + 1],
+                               env->buflen - len);
+
+                       for (j = i; j < env->envp_idx - 1; j++)
+                               env->envp[j] = env->envp[j + 1] - len;
+               }
 
                env->envp_idx--;
+               env->buflen -= len;
        }
 }
 
index 57fd45ab7af1a42aaf290866a2c0bbc5da7f8710..08c60d10747fddc204788e75f24107805702caf4 100644 (file)
@@ -671,7 +671,23 @@ do {                                               \
        **************  MIPS/64  **************
        ***************************************/
 #if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
+#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
+/*
+ * GCC ends up emitting a __multi3 intrinsic call for MIPS64r6 with the plain C
+ * code below, so we special case MIPS64r6 until the compiler can do better.
+ */
+#define umul_ppmm(w1, w0, u, v)                                                \
+do {                                                                   \
+       __asm__ ("dmulu %0,%1,%2"                                       \
+                : "=d" ((UDItype)(w0))                                 \
+                : "d" ((UDItype)(u)),                                  \
+                  "d" ((UDItype)(v)));                                 \
+       __asm__ ("dmuhu %0,%1,%2"                                       \
+                : "=d" ((UDItype)(w1))                                 \
+                : "d" ((UDItype)(u)),                                  \
+                  "d" ((UDItype)(v)));                                 \
+} while (0)
+#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
 #define umul_ppmm(w1, w0, u, v) \
 do {                                                                   \
        typedef unsigned int __ll_UTItype __attribute__((mode(TI)));    \
index aa8812ae6776ee31712fe88c58da4048ff9c31e4..f369889e521d7e7f1e237236651532e6a862e0f5 100644 (file)
@@ -435,6 +435,41 @@ loop:
        return 0;
 }
 
+static int bpf_fill_ld_abs_vlan_push_pop2(struct bpf_test *self)
+{
+       struct bpf_insn *insn;
+
+       insn = kmalloc_array(16, sizeof(*insn), GFP_KERNEL);
+       if (!insn)
+               return -ENOMEM;
+
+       /* Due to func address being non-const, we need to
+        * assemble this here.
+        */
+       insn[0] = BPF_MOV64_REG(R6, R1);
+       insn[1] = BPF_LD_ABS(BPF_B, 0);
+       insn[2] = BPF_LD_ABS(BPF_H, 0);
+       insn[3] = BPF_LD_ABS(BPF_W, 0);
+       insn[4] = BPF_MOV64_REG(R7, R6);
+       insn[5] = BPF_MOV64_IMM(R6, 0);
+       insn[6] = BPF_MOV64_REG(R1, R7);
+       insn[7] = BPF_MOV64_IMM(R2, 1);
+       insn[8] = BPF_MOV64_IMM(R3, 2);
+       insn[9] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                              bpf_skb_vlan_push_proto.func - __bpf_call_base);
+       insn[10] = BPF_MOV64_REG(R6, R7);
+       insn[11] = BPF_LD_ABS(BPF_B, 0);
+       insn[12] = BPF_LD_ABS(BPF_H, 0);
+       insn[13] = BPF_LD_ABS(BPF_W, 0);
+       insn[14] = BPF_MOV64_IMM(R0, 42);
+       insn[15] = BPF_EXIT_INSN();
+
+       self->u.ptr.insns = insn;
+       self->u.ptr.len = 16;
+
+       return 0;
+}
+
 static int bpf_fill_jump_around_ld_abs(struct bpf_test *self)
 {
        unsigned int len = BPF_MAXINSNS;
@@ -6066,6 +6101,14 @@ static struct bpf_test tests[] = {
                {},
                { {0x1, 0x42 } },
        },
+       {
+               "LD_ABS with helper changing skb data",
+               { },
+               INTERNAL,
+               { 0x34 },
+               { { ETH_HLEN, 42 } },
+               .fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
+       },
 };
 
 static struct net_device dev;
@@ -6207,9 +6250,8 @@ static struct bpf_prog *generate_filter(int which, int *err)
                                return NULL;
                        }
                }
-               /* We don't expect to fail. */
                if (*err) {
-                       pr_cont("FAIL to attach err=%d len=%d\n",
+                       pr_cont("FAIL to prog_create err=%d len=%d\n",
                                *err, fprog.len);
                        return NULL;
                }
@@ -6233,6 +6275,10 @@ static struct bpf_prog *generate_filter(int which, int *err)
                 * checks.
                 */
                fp = bpf_prog_select_runtime(fp, err);
+               if (*err) {
+                       pr_cont("FAIL to select_runtime err=%d\n", *err);
+                       return NULL;
+               }
                break;
        }
 
@@ -6418,8 +6464,8 @@ static __init int test_bpf(void)
                                pass_cnt++;
                                continue;
                        }
-
-                       return err;
+                       err_cnt++;
+                       continue;
                }
 
                pr_cont("jited:%u ", fp->jited);
index 4a720ed4fdafd575df46159a3d51131902d638e4..0d54bcbc8170c7754aef0487eb10b8f7b5773103 100644 (file)
@@ -33,8 +33,9 @@
  * @head: head of timerqueue
  * @node: timer node to be added
  *
- * Adds the timer node to the timerqueue, sorted by the
- * node's expires value.
+ * Adds the timer node to the timerqueue, sorted by the node's expires
+ * value. Returns true if the newly added timer is the first expiring timer in
+ * the queue.
  */
 bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 {
@@ -70,7 +71,8 @@ EXPORT_SYMBOL_GPL(timerqueue_add);
  * @head: head of timerqueue
  * @node: timer node to be removed
  *
- * Removes the timer node from the timerqueue.
+ * Removes the timer node from the timerqueue. Returns true if the queue is
+ * not empty after the remove.
  */
 bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
 {
index 84b2dc76f140e922e2ed0d7c4d545b4d4ddf496d..b5f940ce0143ba061a183db0df3ef0dc17f57c72 100644 (file)
@@ -882,13 +882,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
        if (IS_ERR(dev))
                return PTR_ERR(dev);
 
-       if (bdi_debug_register(bdi, dev_name(dev))) {
-               device_destroy(bdi_class, dev->devt);
-               return -ENOMEM;
-       }
        cgwb_bdi_register(bdi);
        bdi->dev = dev;
 
+       bdi_debug_register(bdi, dev_name(dev));
        set_bit(WB_registered, &bdi->wb.state);
 
        spin_lock_bh(&bdi_lock);
index d947f3e03b0dff6ab0ee8ac4d714ae361421fd86..56e2d9125ea55a57632feb22c64f2c1a76f5ec6e 100644 (file)
@@ -50,7 +50,7 @@ void __dump_page(struct page *page, const char *reason)
         */
        int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
 
-       pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
+       pr_emerg("page:%px count:%d mapcount:%d mapping:%px index:%#lx",
                  page, page_ref_count(page), mapcount,
                  page->mapping, page_to_pgoff(page));
        if (PageCompound(page))
@@ -69,7 +69,7 @@ void __dump_page(struct page *page, const char *reason)
 
 #ifdef CONFIG_MEMCG
        if (page->mem_cgroup)
-               pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup);
+               pr_alert("page->mem_cgroup:%px\n", page->mem_cgroup);
 #endif
 }
 
@@ -84,10 +84,10 @@ EXPORT_SYMBOL(dump_page);
 
 void dump_vma(const struct vm_area_struct *vma)
 {
-       pr_emerg("vma %p start %p end %p\n"
-               "next %p prev %p mm %p\n"
-               "prot %lx anon_vma %p vm_ops %p\n"
-               "pgoff %lx file %p private_data %p\n"
+       pr_emerg("vma %px start %px end %px\n"
+               "next %px prev %px mm %px\n"
+               "prot %lx anon_vma %px vm_ops %px\n"
+               "pgoff %lx file %px private_data %px\n"
                "flags: %#lx(%pGv)\n",
                vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
                vma->vm_prev, vma->vm_mm,
@@ -100,27 +100,27 @@ EXPORT_SYMBOL(dump_vma);
 
 void dump_mm(const struct mm_struct *mm)
 {
-       pr_emerg("mm %p mmap %p seqnum %d task_size %lu\n"
+       pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n"
 #ifdef CONFIG_MMU
-               "get_unmapped_area %p\n"
+               "get_unmapped_area %px\n"
 #endif
                "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
-               "pgd %p mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
+               "pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
                "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
                "pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
                "start_code %lx end_code %lx start_data %lx end_data %lx\n"
                "start_brk %lx brk %lx start_stack %lx\n"
                "arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
-               "binfmt %p flags %lx core_state %p\n"
+               "binfmt %px flags %lx core_state %px\n"
 #ifdef CONFIG_AIO
-               "ioctx_table %p\n"
+               "ioctx_table %px\n"
 #endif
 #ifdef CONFIG_MEMCG
-               "owner %p "
+               "owner %px "
 #endif
-               "exe_file %p\n"
+               "exe_file %px\n"
 #ifdef CONFIG_MMU_NOTIFIER
-               "mmu_notifier_mm %p\n"
+               "mmu_notifier_mm %px\n"
 #endif
 #ifdef CONFIG_NUMA_BALANCING
                "numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
index d73c14294f3a61c2385741b447aa31d203a2bc72..f656ca27f6c20596322ce2aea248b4b04085bb29 100644 (file)
 /* GFP bitmask for kmemleak internal allocations */
 #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
                                 __GFP_NORETRY | __GFP_NOMEMALLOC | \
-                                __GFP_NOWARN)
+                                __GFP_NOWARN | __GFP_NOFAIL)
 
 /* scanning area inside a memory block */
 struct kmemleak_scan_area {
index ec39f730a0bfeebcd1d04ec34c5955f48a6f5259..58b629bb70de3024aba118000f83f52dd92e6d95 100644 (file)
@@ -166,7 +166,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                next = pmd_addr_end(addr, end);
                if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)
                                && pmd_none_or_clear_bad(pmd))
-                       continue;
+                       goto next;
 
                /* invoke the mmu notifier if the pmd is populated */
                if (!mni_start) {
@@ -188,7 +188,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                                        }
 
                                        /* huge pmd was handled */
-                                       continue;
+                                       goto next;
                                }
                        }
                        /* fall through, the trans huge pmd just split */
@@ -196,6 +196,8 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
                this_pages = change_pte_range(vma, pmd, addr, next, newprot,
                                 dirty_accountable, prot_numa);
                pages += this_pages;
+next:
+               cond_resched();
        } while (pmd++, addr = next, addr != end);
 
        if (mni_start)
index 7e5e775e97f400d8a050effc41f68a8261bd9a23..76c9688b6a0a75fc1c28e90920a8c9498c5e6d06 100644 (file)
@@ -6260,6 +6260,8 @@ void __paginginit zero_resv_unavail(void)
        pgcnt = 0;
        for_each_resv_unavail_range(i, &start, &end) {
                for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) {
+                       if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages)))
+                               continue;
                        mm_zero_struct_page(pfn_to_page(pfn));
                        pgcnt++;
                }
index 7a5dacaa06e3f277c8543c502dba0e3c6a6a1a13..2609aba121e89cc5c8b656f4ef3539545c434091 100644 (file)
@@ -211,7 +211,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
        if (unlikely(!mem_section)) {
                unsigned long size, align;
 
-               size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
+               size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
                align = 1 << (INTERNODE_CACHE_SHIFT);
                mem_section = memblock_virt_alloc(size, align);
        }
index c02c850ea3490af95fde44f94bd199fbdc500684..47d5ced51f2d44cddc559814b42caa0ea9bf5863 100644 (file)
@@ -297,10 +297,13 @@ EXPORT_SYMBOL(register_shrinker);
  */
 void unregister_shrinker(struct shrinker *shrinker)
 {
+       if (!shrinker->nr_deferred)
+               return;
        down_write(&shrinker_rwsem);
        list_del(&shrinker->list);
        up_write(&shrinker_rwsem);
        kfree(shrinker->nr_deferred);
+       shrinker->nr_deferred = NULL;
 }
 EXPORT_SYMBOL(unregister_shrinker);
 
index 685049a9048d8e9f8b53041cba43b96317b112fa..683c0651098c719ab25c5b48789262e541a13362 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/mount.h>
 #include <linux/migrate.h>
 #include <linux/pagemap.h>
+#include <linux/fs.h>
 
 #define ZSPAGE_MAGIC   0x58
 
index 8dfdd94e430fd57f7bf6d1da86f18658a057e05d..bad01b14a4ad6b5d4e61ac5e0ed93022755223ab 100644 (file)
@@ -111,12 +111,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
                vlan_gvrp_uninit_applicant(real_dev);
        }
 
-       /* Take it out of our own structures, but be sure to interlock with
-        * HW accelerating devices or SW vlan input packet processing if
-        * VLAN is not 0 (leave it there for 802.1p).
-        */
-       if (vlan_id)
-               vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
+       vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
 
        /* Get rid of the vlan's reference to real_dev */
        dev_put(real_dev);
index 43ba91c440bcd65bd9f24258a28d01492cf6ac13..fc6615d5916524c446a9f4f952f2f8b7b5b67e16 100644 (file)
@@ -3363,9 +3363,10 @@ static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data
                        break;
 
                case L2CAP_CONF_EFS:
-                       remote_efs = 1;
-                       if (olen == sizeof(efs))
+                       if (olen == sizeof(efs)) {
+                               remote_efs = 1;
                                memcpy(&efs, (void *) val, olen);
+                       }
                        break;
 
                case L2CAP_CONF_EWS:
@@ -3584,16 +3585,17 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
                        break;
 
                case L2CAP_CONF_EFS:
-                       if (olen == sizeof(efs))
+                       if (olen == sizeof(efs)) {
                                memcpy(&efs, (void *)val, olen);
 
-                       if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
-                           efs.stype != L2CAP_SERV_NOTRAFIC &&
-                           efs.stype != chan->local_stype)
-                               return -ECONNREFUSED;
+                               if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
+                                   efs.stype != L2CAP_SERV_NOTRAFIC &&
+                                   efs.stype != chan->local_stype)
+                                       return -ECONNREFUSED;
 
-                       l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
-                                          (unsigned long) &efs, endptr - ptr);
+                               l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
+                                                  (unsigned long) &efs, endptr - ptr);
+                       }
                        break;
 
                case L2CAP_CONF_FCS:
index d0ef0a8e8831920cb86fc767eb0d756bf89feb46..015f465c514b28564c9e91eec40dc041b765fe25 100644 (file)
@@ -1262,19 +1262,20 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
        struct net_bridge *br = netdev_priv(dev);
        int err;
 
+       err = register_netdevice(dev);
+       if (err)
+               return err;
+
        if (tb[IFLA_ADDRESS]) {
                spin_lock_bh(&br->lock);
                br_stp_change_bridge_id(br, nla_data(tb[IFLA_ADDRESS]));
                spin_unlock_bh(&br->lock);
        }
 
-       err = register_netdevice(dev);
-       if (err)
-               return err;
-
        err = br_changelink(dev, tb, data, extack);
        if (err)
-               unregister_netdevice(dev);
+               br_dev_delete(dev, NULL);
+
        return err;
 }
 
index 2d38b6e34203b7df5638c340126c2f5169fb3791..e0adcd123f48a1a4f66028bbf731132ed8e7ff17 100644 (file)
@@ -334,9 +334,8 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
        mutex_lock(&caifdevs->lock);
        list_add_rcu(&caifd->list, &caifdevs->list);
 
-       strncpy(caifd->layer.name, dev->name,
-               sizeof(caifd->layer.name) - 1);
-       caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+       strlcpy(caifd->layer.name, dev->name,
+               sizeof(caifd->layer.name));
        caifd->layer.transmit = transmit;
        cfcnfg_add_phy_layer(cfg,
                                dev,
index 5cd44f001f6479a5e5ec9d02cfc417b899b05e9d..1a082a946045e53f1617cc9a814bae5dcb9784e7 100644 (file)
@@ -176,9 +176,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
                dev_add_pack(&caif_usb_type);
        pack_added = true;
 
-       strncpy(layer->name, dev->name,
-                       sizeof(layer->name) - 1);
-       layer->name[sizeof(layer->name) - 1] = 0;
+       strlcpy(layer->name, dev->name, sizeof(layer->name));
 
        return 0;
 }
index 273cb07f57d87186224fb50943af949b99b3cde2..8f00bea093b942f8b50193def6d311d09e75b8eb 100644 (file)
@@ -268,17 +268,15 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
        case CAIFPROTO_RFM:
                l->linktype = CFCTRL_SRV_RFM;
                l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
-               strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
-                       sizeof(l->u.rfm.volume)-1);
-               l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
+               strlcpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+                       sizeof(l->u.rfm.volume));
                break;
        case CAIFPROTO_UTIL:
                l->linktype = CFCTRL_SRV_UTIL;
                l->endpoint = 0x00;
                l->chtype = 0x00;
-               strncpy(l->u.utility.name, s->sockaddr.u.util.service,
-                       sizeof(l->u.utility.name)-1);
-               l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
+               strlcpy(l->u.utility.name, s->sockaddr.u.util.service,
+                       sizeof(l->u.utility.name));
                caif_assert(sizeof(l->u.utility.name) > 10);
                l->u.utility.paramlen = s->param.size;
                if (l->u.utility.paramlen > sizeof(l->u.utility.params))
index f5afda1abc76fe908e8cce160ad032870972424c..655ed7032150309062d0c8c85cda571d34030423 100644 (file)
@@ -258,8 +258,8 @@ int cfctrl_linkup_request(struct cflayer *layer,
                tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
                cfpkt_add_body(pkt, &tmp16, 2);
                memset(utility_name, 0, sizeof(utility_name));
-               strncpy(utility_name, param->u.utility.name,
-                       UTILITY_NAME_LENGTH - 1);
+               strlcpy(utility_name, param->u.utility.name,
+                       UTILITY_NAME_LENGTH);
                cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
                tmp8 = param->u.utility.paramlen;
                cfpkt_add_body(pkt, &tmp8, 1);
index f47e96b623088ae354947787ef17217cd32ae64e..0e0ba36eeac9852b8df5ddd398dbc66ad6c83760 100644 (file)
@@ -1146,7 +1146,19 @@ EXPORT_SYMBOL(dev_alloc_name);
 int dev_get_valid_name(struct net *net, struct net_device *dev,
                       const char *name)
 {
-       return dev_alloc_name_ns(net, dev, name);
+       BUG_ON(!net);
+
+       if (!dev_valid_name(name))
+               return -EINVAL;
+
+       if (strchr(name, '%'))
+               return dev_alloc_name_ns(net, dev, name);
+       else if (__dev_get_by_name(net, name))
+               return -EEXIST;
+       else if (dev->name != name)
+               strlcpy(dev->name, name, IFNAMSIZ);
+
+       return 0;
 }
 EXPORT_SYMBOL(dev_get_valid_name);
 
@@ -3904,7 +3916,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
                                     hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
                                     troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
                        goto do_drop;
-               if (troom > 0 && __skb_linearize(skb))
+               if (skb_linearize(skb))
                        goto do_drop;
        }
 
index f8fcf450a36e604fe4c91dcccb4fd2a4a35ef1df..8225416911aed6d95f91ed5cb98ef1fb843101bb 100644 (file)
@@ -770,15 +770,6 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
        return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
 }
 
-static void
-warn_incomplete_ethtool_legacy_settings_conversion(const char *details)
-{
-       char name[sizeof(current->comm)];
-
-       pr_info_once("warning: `%s' uses legacy ethtool link settings API, %s\n",
-                    get_task_comm(name, current), details);
-}
-
 /* Query device for its ethtool_cmd settings.
  *
  * Backward compatibility note: for compatibility with legacy ethtool,
@@ -805,10 +796,8 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
                                                           &link_ksettings);
                if (err < 0)
                        return err;
-               if (!convert_link_ksettings_to_legacy_settings(&cmd,
-                                                              &link_ksettings))
-                       warn_incomplete_ethtool_legacy_settings_conversion(
-                               "link modes are only partially reported");
+               convert_link_ksettings_to_legacy_settings(&cmd,
+                                                         &link_ksettings);
 
                /* send a sensible cmd tag back to user */
                cmd.cmd = ETHTOOL_GSET;
index 6a85e67fafce224b534dd87bb9407ae115f8ba7a..d339ef170df60282c8812b63148c479820b3008f 100644 (file)
@@ -1054,11 +1054,9 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
                 */
                goto out_err_free;
 
-       /* We are guaranteed to never error here with cBPF to eBPF
-        * transitions, since there's no issue with type compatibility
-        * checks on program arrays.
-        */
        fp = bpf_prog_select_runtime(fp, &err);
+       if (err)
+               goto out_err_free;
 
        kfree(old_prog);
        return fp;
index b797832565d34ccefb374ee87f9cbc460779a484..60a71be75aea063b418a48ade2a1e1c7804ab35c 100644 (file)
@@ -267,7 +267,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
        spin_lock_bh(&net->nsid_lock);
        peer = idr_find(&net->netns_ids, id);
        if (peer)
-               get_net(peer);
+               peer = maybe_get_net(peer);
        spin_unlock_bh(&net->nsid_lock);
        rcu_read_unlock();
 
index dabba2a91fc8ff5852681dcee53149d99798ed79..778d7f03404a6631607f29c3267a0e39a40ac57c 100644 (file)
@@ -1681,18 +1681,18 @@ static bool link_dump_filtered(struct net_device *dev,
        return false;
 }
 
-static struct net *get_target_net(struct sk_buff *skb, int netnsid)
+static struct net *get_target_net(struct sock *sk, int netnsid)
 {
        struct net *net;
 
-       net = get_net_ns_by_id(sock_net(skb->sk), netnsid);
+       net = get_net_ns_by_id(sock_net(sk), netnsid);
        if (!net)
                return ERR_PTR(-EINVAL);
 
        /* For now, the caller is required to have CAP_NET_ADMIN in
         * the user namespace owning the target net ns.
         */
-       if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+       if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) {
                put_net(net);
                return ERR_PTR(-EACCES);
        }
@@ -1733,7 +1733,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
                        ifla_policy, NULL) >= 0) {
                if (tb[IFLA_IF_NETNSID]) {
                        netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
-                       tgt_net = get_target_net(skb, netnsid);
+                       tgt_net = get_target_net(skb->sk, netnsid);
                        if (IS_ERR(tgt_net)) {
                                tgt_net = net;
                                netnsid = -1;
@@ -2883,7 +2883,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 
        if (tb[IFLA_IF_NETNSID]) {
                netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
-               tgt_net = get_target_net(skb, netnsid);
+               tgt_net = get_target_net(NETLINK_CB(skb).sk, netnsid);
                if (IS_ERR(tgt_net))
                        return PTR_ERR(tgt_net);
        }
index a592ca025fc46bf4ff26c900d273b1ae12afa334..08f57408131523adec3cc36044952e14aa5a6b9a 100644 (file)
@@ -1177,12 +1177,12 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
        int i, new_frags;
        u32 d_off;
 
-       if (!num_frags)
-               return 0;
-
        if (skb_shared(skb) || skb_unclone(skb, gfp_mask))
                return -EINVAL;
 
+       if (!num_frags)
+               goto release;
+
        new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT;
        for (i = 0; i < new_frags; i++) {
                page = alloc_page(gfp_mask);
@@ -1238,6 +1238,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
        __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
        skb_shinfo(skb)->nr_frags = new_frags;
 
+release:
        skb_zcopy_clear(skb, false);
        return 0;
 }
@@ -3654,8 +3655,6 @@ normal:
 
                skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
                                              SKBTX_SHARED_FRAG;
-               if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC))
-                       goto err;
 
                while (pos < offset + len) {
                        if (i >= nfrags) {
@@ -3681,6 +3680,8 @@ normal:
 
                        if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
                                goto err;
+                       if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+                               goto err;
 
                        *nskb_frag = *frag;
                        __skb_frag_ref(nskb_frag);
index 217f4e3b82f6edca841b66089ab0772bd8d391e7..146b50e30659daca3bdc4413d800890ef482f521 100644 (file)
@@ -288,7 +288,7 @@ static int sock_diag_bind(struct net *net, int group)
        case SKNLGRP_INET6_UDP_DESTROY:
                if (!sock_diag_handlers[AF_INET6])
                        request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-                                      NETLINK_SOCK_DIAG, AF_INET);
+                                      NETLINK_SOCK_DIAG, AF_INET6);
                break;
        }
        return 0;
index cbc3dde4cfcccae89ba262bb032edbf13639321f..a47ad6cd41c0396558ee6666ae45e78f36d64bd5 100644 (file)
@@ -325,7 +325,13 @@ static struct ctl_table net_core_table[] = {
                .data           = &bpf_jit_enable,
                .maxlen         = sizeof(int),
                .mode           = 0644,
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
                .proc_handler   = proc_dointvec
+#else
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &one,
+               .extra2         = &one,
+#endif
        },
 # ifdef CONFIG_HAVE_EBPF_JIT
        {
index f52d27a422c37298b2ad0c1dbba0e5307f1a46b6..08259d078b1ca821c581aeb34251c79a9aba8c8d 100644 (file)
@@ -1298,14 +1298,19 @@ err_table_hash_alloc:
 
 static void ip_fib_net_exit(struct net *net)
 {
-       unsigned int i;
+       int i;
 
        rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
        RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
 #endif
-       for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
+       /* Destroy the tables in reverse order to guarantee that the
+        * local table, ID 255, is destroyed before the main table, ID
+        * 254. This is necessary as the local table may contain
+        * references to data contained in the main table.
+        */
+       for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) {
                struct hlist_head *head = &net->ipv4.fib_table_hash[i];
                struct hlist_node *tmp;
                struct fib_table *tb;
index f04d944f8abe0bfbb840837bb35d28fe6d8d25d0..c586597da20dbb0e46eb0f693fd65bccfc8f3633 100644 (file)
@@ -698,7 +698,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
 
        nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
                int type = nla_type(nla);
-               u32 val;
+               u32 fi_val, val;
 
                if (!type)
                        continue;
@@ -715,7 +715,11 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
                        val = nla_get_u32(nla);
                }
 
-               if (fi->fib_metrics->metrics[type - 1] != val)
+               fi_val = fi->fib_metrics->metrics[type - 1];
+               if (type == RTAX_FEATURES)
+                       fi_val &= ~DST_FEATURE_ECN_CA;
+
+               if (fi_val != val)
                        return false;
        }
 
index 9c1735632c8c43cc0607d54403571362895d91e2..45ffd3d045d240cad8e4d0ed8dd0dd7da997bf9e 100644 (file)
@@ -1310,6 +1310,7 @@ static const struct net_device_ops erspan_netdev_ops = {
 static void ipgre_tap_setup(struct net_device *dev)
 {
        ether_setup(dev);
+       dev->max_mtu = 0;
        dev->netdev_ops = &gre_tap_netdev_ops;
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
index 125c1eab3eaa6d894804c3aa8918aa7fcc736ca0..5e570aa9e43b77f5c9ccd267319f762200f732d2 100644 (file)
@@ -520,9 +520,11 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                goto out;
 
        /* hdrincl should be READ_ONCE(inet->hdrincl)
-        * but READ_ONCE() doesn't work with bit fields
+        * but READ_ONCE() doesn't work with bit fields.
+        * Doing this indirectly yields the same result.
         */
        hdrincl = inet->hdrincl;
+       hdrincl = READ_ONCE(hdrincl);
        /*
         *      Check the flags.
         */
index e50b7fea57ee35c117002463f473ccd41face358..bcfc00e88756dabb1f491d3d41137ccbc7ab1cbc 100644 (file)
@@ -23,6 +23,12 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
        return xfrm4_extract_header(skb);
 }
 
+static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       return dst_input(skb);
+}
+
 static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
                                         struct sk_buff *skb)
 {
@@ -33,7 +39,11 @@ static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
                                         iph->tos, skb->dev))
                        goto drop;
        }
-       return dst_input(skb);
+
+       if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2))
+               goto drop;
+
+       return 0;
 drop:
        kfree_skb(skb);
        return NET_RX_DROP;
index c26f71234b9c01a82ec9d40423ee28957468ed46..c9441ca4539936486291147a47a84ef1b2ecf095 100644 (file)
@@ -210,7 +210,6 @@ lookup_protocol:
        np->mcast_hops  = IPV6_DEFAULT_MCASTHOPS;
        np->mc_loop     = 1;
        np->pmtudisc    = IPV6_PMTUDISC_WANT;
-       np->autoflowlabel = ip6_default_np_autolabel(net);
        np->repflow     = net->ipv6.sysctl.flowlabel_reflect;
        sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
 
index 83bd75713535cf8fadf348e5d452445aba9c426c..bc68eb661970a5404e6cfb060b0011680d104ed5 100644 (file)
@@ -925,6 +925,15 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
        sr_phdr->segments[0] = **addr_p;
        *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left];
 
+       if (sr_ihdr->hdrlen > hops * 2) {
+               int tlvs_offset, tlvs_length;
+
+               tlvs_offset = (1 + hops * 2) << 3;
+               tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3;
+               memcpy((char *)sr_phdr + tlvs_offset,
+                      (char *)sr_ihdr + tlvs_offset, tlvs_length);
+       }
+
 #ifdef CONFIG_IPV6_SEG6_HMAC
        if (sr_has_hmac(sr_phdr)) {
                struct net *net = NULL;
index f5285f4e1d08acb60d42fb6fd10a0c38a239324f..9dcc3924a97561d689a1fd8862742d9543339dd2 100644 (file)
@@ -640,6 +640,11 @@ static struct fib6_node *fib6_add_1(struct net *net,
                        if (!(fn->fn_flags & RTN_RTINFO)) {
                                RCU_INIT_POINTER(fn->leaf, NULL);
                                rt6_release(leaf);
+                       /* remove null_entry in the root node */
+                       } else if (fn->fn_flags & RTN_TL_ROOT &&
+                                  rcu_access_pointer(fn->leaf) ==
+                                  net->ipv6.ip6_null_entry) {
+                               RCU_INIT_POINTER(fn->leaf, NULL);
                        }
 
                        return fn;
@@ -1241,23 +1246,28 @@ out:
                 * If fib6_add_1 has cleared the old leaf pointer in the
                 * super-tree leaf node we have to find a new one for it.
                 */
-               struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
-                                           lockdep_is_held(&table->tb6_lock));
-               if (pn != fn && pn_leaf == rt) {
-                       pn_leaf = NULL;
-                       RCU_INIT_POINTER(pn->leaf, NULL);
-                       atomic_dec(&rt->rt6i_ref);
-               }
-               if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
-                       pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
-#if RT6_DEBUG >= 2
-                       if (!pn_leaf) {
-                               WARN_ON(!pn_leaf);
-                               pn_leaf = info->nl_net->ipv6.ip6_null_entry;
+               if (pn != fn) {
+                       struct rt6_info *pn_leaf =
+                               rcu_dereference_protected(pn->leaf,
+                                   lockdep_is_held(&table->tb6_lock));
+                       if (pn_leaf == rt) {
+                               pn_leaf = NULL;
+                               RCU_INIT_POINTER(pn->leaf, NULL);
+                               atomic_dec(&rt->rt6i_ref);
                        }
+                       if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+                               pn_leaf = fib6_find_prefix(info->nl_net, table,
+                                                          pn);
+#if RT6_DEBUG >= 2
+                               if (!pn_leaf) {
+                                       WARN_ON(!pn_leaf);
+                                       pn_leaf =
+                                           info->nl_net->ipv6.ip6_null_entry;
+                               }
 #endif
-                       atomic_inc(&pn_leaf->rt6i_ref);
-                       rcu_assign_pointer(pn->leaf, pn_leaf);
+                               atomic_inc(&pn_leaf->rt6i_ref);
+                               rcu_assign_pointer(pn->leaf, pn_leaf);
+                       }
                }
 #endif
                goto failure;
@@ -1265,13 +1275,17 @@ out:
        return err;
 
 failure:
-       /* fn->leaf could be NULL if fn is an intermediate node and we
-        * failed to add the new route to it in both subtree creation
-        * failure and fib6_add_rt2node() failure case.
-        * In both cases, fib6_repair_tree() should be called to fix
-        * fn->leaf.
+       /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
+        * 1. fn is an intermediate node and we failed to add the new
+        * route to it in both subtree creation failure and fib6_add_rt2node()
+        * failure case.
+        * 2. fn is the root node in the table and we fail to add the first
+        * default route to it.
         */
-       if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+       if (fn &&
+           (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
+            (fn->fn_flags & RTN_TL_ROOT &&
+             !rcu_access_pointer(fn->leaf))))
                fib6_repair_tree(info->nl_net, table, fn);
        /* Always release dst as dst->__refcnt is guaranteed
         * to be taken before entering this function
@@ -1526,6 +1540,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
        struct fib6_walker *w;
        int iter = 0;
 
+       /* Set fn->leaf to null_entry for root node. */
+       if (fn->fn_flags & RTN_TL_ROOT) {
+               rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
+               return fn;
+       }
+
        for (;;) {
                struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
                                            lockdep_is_held(&table->tb6_lock));
@@ -1680,10 +1700,15 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
        }
        read_unlock(&net->ipv6.fib6_walker_lock);
 
-       /* If it was last route, expunge its radix tree node */
+       /* If it was last route, call fib6_repair_tree() to:
+        * 1. For root node, put back null_entry as how the table was created.
+        * 2. For other nodes, expunge its radix tree node.
+        */
        if (!rcu_access_pointer(fn->leaf)) {
-               fn->fn_flags &= ~RTN_RTINFO;
-               net->ipv6.rt6_stats->fib_route_nodes--;
+               if (!(fn->fn_flags & RTN_TL_ROOT)) {
+                       fn->fn_flags &= ~RTN_RTINFO;
+                       net->ipv6.rt6_stats->fib_route_nodes--;
+               }
                fn = fib6_repair_tree(net, table, fn);
        }
 
index 4cfd8e0696fe77f6d7af7ca3579a2418aef972f6..772695960890893f9ab7862cf64728b783f5bb96 100644 (file)
@@ -1014,6 +1014,36 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
        eth_random_addr(dev->perm_addr);
 }
 
+#define GRE6_FEATURES (NETIF_F_SG |            \
+                      NETIF_F_FRAGLIST |       \
+                      NETIF_F_HIGHDMA |        \
+                      NETIF_F_HW_CSUM)
+
+static void ip6gre_tnl_init_features(struct net_device *dev)
+{
+       struct ip6_tnl *nt = netdev_priv(dev);
+
+       dev->features           |= GRE6_FEATURES;
+       dev->hw_features        |= GRE6_FEATURES;
+
+       if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
+               /* TCP offload with GRE SEQ is not supported, nor
+                * can we support 2 levels of outer headers requiring
+                * an update.
+                */
+               if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
+                   nt->encap.type == TUNNEL_ENCAP_NONE) {
+                       dev->features    |= NETIF_F_GSO_SOFTWARE;
+                       dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+               }
+
+               /* Can use a lockless transmit, unless we generate
+                * output sequences
+                */
+               dev->features |= NETIF_F_LLTX;
+       }
+}
+
 static int ip6gre_tunnel_init_common(struct net_device *dev)
 {
        struct ip6_tnl *tunnel;
@@ -1048,6 +1078,8 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
        if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
                dev->mtu -= 8;
 
+       ip6gre_tnl_init_features(dev);
+
        return 0;
 }
 
@@ -1298,16 +1330,12 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
        .ndo_get_iflink = ip6_tnl_get_iflink,
 };
 
-#define GRE6_FEATURES (NETIF_F_SG |            \
-                      NETIF_F_FRAGLIST |       \
-                      NETIF_F_HIGHDMA |                \
-                      NETIF_F_HW_CSUM)
-
 static void ip6gre_tap_setup(struct net_device *dev)
 {
 
        ether_setup(dev);
 
+       dev->max_mtu = 0;
        dev->netdev_ops = &ip6gre_tap_netdev_ops;
        dev->needs_free_netdev = true;
        dev->priv_destructor = ip6gre_dev_free;
@@ -1382,26 +1410,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
        nt->net = dev_net(dev);
        ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]);
 
-       dev->features           |= GRE6_FEATURES;
-       dev->hw_features        |= GRE6_FEATURES;
-
-       if (!(nt->parms.o_flags & TUNNEL_SEQ)) {
-               /* TCP offload with GRE SEQ is not supported, nor
-                * can we support 2 levels of outer headers requiring
-                * an update.
-                */
-               if (!(nt->parms.o_flags & TUNNEL_CSUM) ||
-                   (nt->encap.type == TUNNEL_ENCAP_NONE)) {
-                       dev->features    |= NETIF_F_GSO_SOFTWARE;
-                       dev->hw_features |= NETIF_F_GSO_SOFTWARE;
-               }
-
-               /* Can use a lockless transmit, unless we generate
-                * output sequences
-                */
-               dev->features |= NETIF_F_LLTX;
-       }
-
        err = register_netdevice(dev);
        if (err)
                goto out;
index 5110a418cc4d0c1040506394460cb482698d8c15..688ba5f7516b37c87b879036dce781bdcfa01739 100644 (file)
@@ -166,6 +166,14 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                            !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
+static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+{
+       if (!np->autoflowlabel_set)
+               return ip6_default_np_autolabel(net);
+       else
+               return np->autoflowlabel;
+}
+
 /*
  * xmit an sk_buff (used by TCP, SCTP and DCCP)
  * Note : socket lock is not held for SYNACK packets, but might be modified
@@ -230,7 +238,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
                hlimit = ip6_dst_hoplimit(dst);
 
        ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
-                                                    np->autoflowlabel, fl6));
+                               ip6_autoflowlabel(net, np), fl6));
 
        hdr->payload_len = htons(seg_len);
        hdr->nexthdr = proto;
@@ -1626,7 +1634,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
        ip6_flow_hdr(hdr, v6_cork->tclass,
                     ip6_make_flowlabel(net, skb, fl6->flowlabel,
-                                       np->autoflowlabel, fl6));
+                                       ip6_autoflowlabel(net, np), fl6));
        hdr->hop_limit = v6_cork->hop_limit;
        hdr->nexthdr = proto;
        hdr->saddr = fl6->saddr;
@@ -1727,9 +1735,10 @@ struct sk_buff *ip6_make_skb(struct sock *sk,
        cork.base.opt = NULL;
        v6_cork.opt = NULL;
        err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6);
-       if (err)
+       if (err) {
+               ip6_cork_release(&cork, &v6_cork);
                return ERR_PTR(err);
-
+       }
        if (ipc6->dontfrag < 0)
                ipc6->dontfrag = inet6_sk(sk)->dontfrag;
 
index db84f523656ddf876e1971c416ee03a6a1794d9d..9a7cf355bc8c8ff67388456060c1f7e67d8762ee 100644 (file)
@@ -1074,10 +1074,11 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
                        memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
                        neigh_release(neigh);
                }
-       } else if (!(t->parms.flags &
-                    (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
-               /* enable the cache only only if the routing decision does
-                * not depend on the current inner header value
+       } else if (t->parms.proto != 0 && !(t->parms.flags &
+                                           (IP6_TNL_F_USE_ORIG_TCLASS |
+                                            IP6_TNL_F_USE_ORIG_FWMARK))) {
+               /* enable the cache only if neither the outer protocol nor the
+                * routing decision depends on the current inner header value
                 */
                use_cache = true;
        }
@@ -1123,8 +1124,13 @@ route_lookup:
                max_headroom += 8;
                mtu -= 8;
        }
-       if (mtu < IPV6_MIN_MTU)
-               mtu = IPV6_MIN_MTU;
+       if (skb->protocol == htons(ETH_P_IPV6)) {
+               if (mtu < IPV6_MIN_MTU)
+                       mtu = IPV6_MIN_MTU;
+       } else if (mtu < 576) {
+               mtu = 576;
+       }
+
        if (skb_dst(skb) && !t->parms.collect_md)
                skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
        if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
@@ -1671,11 +1677,11 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
 {
        struct ip6_tnl *tnl = netdev_priv(dev);
 
-       if (tnl->parms.proto == IPPROTO_IPIP) {
-               if (new_mtu < ETH_MIN_MTU)
+       if (tnl->parms.proto == IPPROTO_IPV6) {
+               if (new_mtu < IPV6_MIN_MTU)
                        return -EINVAL;
        } else {
-               if (new_mtu < IPV6_MIN_MTU)
+               if (new_mtu < ETH_MIN_MTU)
                        return -EINVAL;
        }
        if (new_mtu > 0xFFF8 - dev->hard_header_len)
index b9404feabd7857fe0873fbc4f346d281f3600807..2d4680e0376f41deee6c999eadaf9409353e0b4a 100644 (file)
@@ -886,6 +886,7 @@ pref_skip_coa:
                break;
        case IPV6_AUTOFLOWLABEL:
                np->autoflowlabel = valbool;
+               np->autoflowlabel_set = 1;
                retv = 0;
                break;
        case IPV6_RECVFRAGSIZE:
index 7a8d1500d374b4089e623ed2b20d68110cff498e..0458b761f3c56ce765841e0a3a7e5e78f90b95eb 100644 (file)
@@ -2336,6 +2336,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
        }
 
        rt->dst.flags |= DST_HOST;
+       rt->dst.input = ip6_input;
        rt->dst.output  = ip6_output;
        rt->rt6i_gateway  = fl6->daddr;
        rt->rt6i_dst.addr = fl6->daddr;
@@ -4297,19 +4298,13 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                if (!ipv6_addr_any(&fl6.saddr))
                        flags |= RT6_LOOKUP_F_HAS_SADDR;
 
-               if (!fibmatch)
-                       dst = ip6_route_input_lookup(net, dev, &fl6, flags);
-               else
-                       dst = ip6_route_lookup(net, &fl6, 0);
+               dst = ip6_route_input_lookup(net, dev, &fl6, flags);
 
                rcu_read_unlock();
        } else {
                fl6.flowi6_oif = oif;
 
-               if (!fibmatch)
-                       dst = ip6_route_output(net, NULL, &fl6);
-               else
-                       dst = ip6_route_lookup(net, &fl6, 0);
+               dst = ip6_route_output(net, NULL, &fl6);
        }
 
 
@@ -4326,6 +4321,15 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
                goto errout;
        }
 
+       if (fibmatch && rt->dst.from) {
+               struct rt6_info *ort = container_of(rt->dst.from,
+                                                   struct rt6_info, dst);
+
+               dst_hold(&ort->dst);
+               ip6_rt_put(rt);
+               rt = ort;
+       }
+
        skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
        if (!skb) {
                ip6_rt_put(rt);
index fe04e23af9862557fde2a9c214faf3a10b7e5eda..841f4a07438e83502eadd6ec6c16a16d1de6aa55 100644 (file)
@@ -32,6 +32,14 @@ int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 }
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
+static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
+                                  struct sk_buff *skb)
+{
+       if (xfrm_trans_queue(skb, ip6_rcv_finish))
+               __kfree_skb(skb);
+       return -1;
+}
+
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
 {
        struct xfrm_offload *xo = xfrm_offload(skb);
@@ -56,7 +64,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 
        NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
                dev_net(skb->dev), NULL, skb, skb->dev, NULL,
-               ip6_rcv_finish);
+               xfrm6_transport_finish2);
        return -1;
 }
 
index 70e9d2ca8bbec176edac094268247bb125e3cbdc..4daafb07602f5f9727af7bec4ea7603901438b63 100644 (file)
@@ -3632,6 +3632,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
                }
                return true;
        case NL80211_IFTYPE_MESH_POINT:
+               if (ether_addr_equal(sdata->vif.addr, hdr->addr2))
+                       return false;
                if (multicast)
                        return true;
                return ether_addr_equal(sdata->vif.addr, hdr->addr1);
index 10798b35748180746266aa0a84187dfd7f0f1ceb..07bd4138c84ef250decc00a5c70df84298124a3a 100644 (file)
@@ -2072,7 +2072,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
                                continue;
 
                        list_for_each_entry_rcu(chain, &table->chains, list) {
-                               if (ctx && ctx->chain[0] &&
+                               if (ctx && ctx->chain &&
                                    strcmp(ctx->chain, chain->name) != 0)
                                        continue;
 
@@ -4665,8 +4665,10 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
 {
        struct nft_obj_filter *filter = cb->data;
 
-       kfree(filter->table);
-       kfree(filter);
+       if (filter) {
+               kfree(filter->table);
+               kfree(filter);
+       }
 
        return 0;
 }
index 1f7fbd3c7e5a6de0cbe7e0bb5c847a98df371aff..06b090d8e9014d6bf6adfb742b5c2620197b98fb 100644 (file)
@@ -55,21 +55,11 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret)
 
 static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
 {
-       mm_segment_t oldfs = get_fs();
-       int retval, fd;
-
        if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX)
                return -EINVAL;
 
-       set_fs(KERNEL_DS);
-       fd = bpf_obj_get_user(path, 0);
-       set_fs(oldfs);
-       if (fd < 0)
-               return fd;
-
-       retval = __bpf_mt_check_fd(fd, ret);
-       sys_close(fd);
-       return retval;
+       *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER);
+       return PTR_ERR_OR_ZERO(*ret);
 }
 
 static int bpf_mt_check(const struct xt_mtchk_param *par)
index dbe2379329c5517fb164b6024d40fabebe7855c8..f039064ce922f3aac8419dcda65ad875f89e966b 100644 (file)
@@ -579,6 +579,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
                        return -EINVAL;
 
                skb_reset_network_header(skb);
+               key->eth.type = skb->protocol;
        } else {
                eth = eth_hdr(skb);
                ether_addr_copy(key->eth.src, eth->h_source);
@@ -592,15 +593,23 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
                if (unlikely(parse_vlan(skb, key)))
                        return -ENOMEM;
 
-               skb->protocol = parse_ethertype(skb);
-               if (unlikely(skb->protocol == htons(0)))
+               key->eth.type = parse_ethertype(skb);
+               if (unlikely(key->eth.type == htons(0)))
                        return -ENOMEM;
 
+               /* Multiple tagged packets need to retain TPID to satisfy
+                * skb_vlan_pop(), which will later shift the ethertype into
+                * skb->protocol.
+                */
+               if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
+                       skb->protocol = key->eth.cvlan.tpid;
+               else
+                       skb->protocol = key->eth.type;
+
                skb_reset_network_header(skb);
                __skb_push(skb, skb->data - skb_mac_header(skb));
        }
        skb_reset_mac_len(skb);
-       key->eth.type = skb->protocol;
 
        /* Network layer. */
        if (key->eth.type == htons(ETH_P_IP)) {
index bc2f1e0977d657ec09f176cfcecf28839eb1fab0..634cfcb7bba6833bde376706947c99f1cb103199 100644 (file)
@@ -525,6 +525,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args)
 
        local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
 
+       if (args->nr_local == 0)
+               return -EINVAL;
+
        /* figure out the number of pages in the vector */
        for (i = 0; i < args->nr_local; i++) {
                if (copy_from_user(&vec, &local_vec[i],
@@ -874,6 +877,7 @@ int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
 err:
        if (page)
                put_page(page);
+       rm->atomic.op_active = 0;
        kfree(rm->atomic.op_notifier);
 
        return ret;
index b52cdc8ae428819a5853509a06852ec2ebc43a5a..f72466c63f0c5657a2f44e11ae432dd1457991cf 100644 (file)
@@ -1009,6 +1009,9 @@ static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
                        continue;
 
                if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+                       if (cmsg->cmsg_len <
+                           CMSG_LEN(sizeof(struct rds_rdma_args)))
+                               return -EINVAL;
                        args = CMSG_DATA(cmsg);
                        *rdma_bytes += args->remote_vec.bytes;
                }
index e29a48ef7fc348aefdc720cf08d1509ac5b53559..a0ac42b3ed0652fe897d59bd043c189af4fa0a12 100644 (file)
@@ -159,7 +159,7 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
        if (action == TC_ACT_SHOT)
                this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
 
-       tm->lastuse = lastuse;
+       tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a,
index 8b3e593884803370ea27ef5fe770fa5ae829a888..08b61849c2a2f448fa0a29b0c93037ebbbafd0e7 100644 (file)
@@ -239,7 +239,7 @@ static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
        struct tcf_t *tm = &m->tcf_tm;
 
        _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
-       tm->lastuse = lastuse;
+       tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
index b91ea03e3afa717225c00a3d2a03e9d722229fbc..b9d63d2246e667329c30606165dd485b9dc777aa 100644 (file)
@@ -379,6 +379,8 @@ void tcf_block_put(struct tcf_block *block)
 {
        struct tcf_block_ext_info ei = {0, };
 
+       if (!block)
+               return;
        tcf_block_put_ext(block, block->q, &ei);
 }
 
index 6fe798c2df1a5303cd61cd3ad53cd2f9385d16de..8d78e7f4ecc33082517aaab5767a30c119f49dc0 100644 (file)
@@ -42,7 +42,6 @@ struct cls_bpf_prog {
        struct list_head link;
        struct tcf_result res;
        bool exts_integrated;
-       bool offloaded;
        u32 gen_flags;
        struct tcf_exts exts;
        u32 handle;
@@ -148,33 +147,37 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 }
 
 static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
-                              enum tc_clsbpf_command cmd)
+                              struct cls_bpf_prog *oldprog)
 {
-       bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
        struct tcf_block *block = tp->chain->block;
-       bool skip_sw = tc_skip_sw(prog->gen_flags);
        struct tc_cls_bpf_offload cls_bpf = {};
+       struct cls_bpf_prog *obj;
+       bool skip_sw;
        int err;
 
+       skip_sw = prog && tc_skip_sw(prog->gen_flags);
+       obj = prog ?: oldprog;
+
        tc_cls_common_offload_init(&cls_bpf.common, tp);
-       cls_bpf.command = cmd;
-       cls_bpf.exts = &prog->exts;
-       cls_bpf.prog = prog->filter;
-       cls_bpf.name = prog->bpf_name;
-       cls_bpf.exts_integrated = prog->exts_integrated;
-       cls_bpf.gen_flags = prog->gen_flags;
+       cls_bpf.command = TC_CLSBPF_OFFLOAD;
+       cls_bpf.exts = &obj->exts;
+       cls_bpf.prog = prog ? prog->filter : NULL;
+       cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
+       cls_bpf.name = obj->bpf_name;
+       cls_bpf.exts_integrated = obj->exts_integrated;
+       cls_bpf.gen_flags = obj->gen_flags;
 
        err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
-       if (addorrep) {
+       if (prog) {
                if (err < 0) {
-                       cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+                       cls_bpf_offload_cmd(tp, oldprog, prog);
                        return err;
                } else if (err > 0) {
                        prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
                }
        }
 
-       if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
+       if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
                return -EINVAL;
 
        return 0;
@@ -183,38 +186,17 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
                           struct cls_bpf_prog *oldprog)
 {
-       struct cls_bpf_prog *obj = prog;
-       enum tc_clsbpf_command cmd;
-       bool skip_sw;
-       int ret;
-
-       skip_sw = tc_skip_sw(prog->gen_flags) ||
-               (oldprog && tc_skip_sw(oldprog->gen_flags));
-
-       if (oldprog && oldprog->offloaded) {
-               if (!tc_skip_hw(prog->gen_flags)) {
-                       cmd = TC_CLSBPF_REPLACE;
-               } else if (!tc_skip_sw(prog->gen_flags)) {
-                       obj = oldprog;
-                       cmd = TC_CLSBPF_DESTROY;
-               } else {
-                       return -EINVAL;
-               }
-       } else {
-               if (tc_skip_hw(prog->gen_flags))
-                       return skip_sw ? -EINVAL : 0;
-               cmd = TC_CLSBPF_ADD;
-       }
-
-       ret = cls_bpf_offload_cmd(tp, obj, cmd);
-       if (ret)
-               return ret;
+       if (prog && oldprog && prog->gen_flags != oldprog->gen_flags)
+               return -EINVAL;
 
-       obj->offloaded = true;
-       if (oldprog)
-               oldprog->offloaded = false;
+       if (prog && tc_skip_hw(prog->gen_flags))
+               prog = NULL;
+       if (oldprog && tc_skip_hw(oldprog->gen_flags))
+               oldprog = NULL;
+       if (!prog && !oldprog)
+               return 0;
 
-       return 0;
+       return cls_bpf_offload_cmd(tp, prog, oldprog);
 }
 
 static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -222,25 +204,26 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
 {
        int err;
 
-       if (!prog->offloaded)
-               return;
-
-       err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
-       if (err) {
+       err = cls_bpf_offload_cmd(tp, NULL, prog);
+       if (err)
                pr_err("Stopping hardware offload failed: %d\n", err);
-               return;
-       }
-
-       prog->offloaded = false;
 }
 
 static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
                                         struct cls_bpf_prog *prog)
 {
-       if (!prog->offloaded)
-               return;
+       struct tcf_block *block = tp->chain->block;
+       struct tc_cls_bpf_offload cls_bpf = {};
+
+       tc_cls_common_offload_init(&cls_bpf.common, tp);
+       cls_bpf.command = TC_CLSBPF_STATS;
+       cls_bpf.exts = &prog->exts;
+       cls_bpf.prog = prog->filter;
+       cls_bpf.name = prog->bpf_name;
+       cls_bpf.exts_integrated = prog->exts_integrated;
+       cls_bpf.gen_flags = prog->gen_flags;
 
-       cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+       tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
 }
 
 static int cls_bpf_init(struct tcf_proto *tp)
index cd1b200acae7415d5e26c8aa3dbfed602f5796b2..661c7144b53af048b3a65484777910e2d60f25aa 100644 (file)
@@ -1040,6 +1040,8 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 
        if (!tp_head) {
                RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
+               /* Wait for flying RCU callback before it is freed. */
+               rcu_barrier_bh();
                return;
        }
 
@@ -1055,7 +1057,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
        rcu_assign_pointer(*miniqp->p_miniq, miniq);
 
        if (miniq_old)
-               /* This is counterpart of the rcu barrier above. We need to
+               /* This is counterpart of the rcu barriers above. We need to
                 * block potential new user of miniq_old until all readers
                 * are not seeing it.
                 */
index 3f619fdcbf0a0b4a6f35ece8021c011f874a2d79..291c97b07058218635fcfcd06214aa79d74ec80d 100644 (file)
@@ -78,6 +78,9 @@ const char *sctp_cname(const union sctp_subtype cid)
        case SCTP_CID_AUTH:
                return "AUTH";
 
+       case SCTP_CID_RECONF:
+               return "RECONF";
+
        default:
                break;
        }
index 621b5ca3fd1c17c3d7ef7bb1c7677ab98cebbe77..141c9c466ec172ff67930cf38eb02a49e01a12ab 100644 (file)
@@ -399,20 +399,24 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
                return;
        }
 
-       if (t->param_flags & SPP_PMTUD_ENABLE) {
-               /* Update transports view of the MTU */
-               sctp_transport_update_pmtu(t, pmtu);
-
-               /* Update association pmtu. */
-               sctp_assoc_sync_pmtu(asoc);
-       }
+       if (!(t->param_flags & SPP_PMTUD_ENABLE))
+               /* We can't allow retransmitting in such case, as the
+                * retransmission would be sized just as before, and thus we
+                * would get another icmp, and retransmit again.
+                */
+               return;
 
-       /* Retransmit with the new pmtu setting.
-        * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
-        * Needed will never be sent, but if a message was sent before
-        * PMTU discovery was disabled that was larger than the PMTU, it
-        * would not be fragmented, so it must be re-transmitted fragmented.
+       /* Update transports view of the MTU. Return if no update was needed.
+        * If an update wasn't needed/possible, it also doesn't make sense to
+        * try to retransmit now.
         */
+       if (!sctp_transport_update_pmtu(t, pmtu))
+               return;
+
+       /* Update association pmtu. */
+       sctp_assoc_sync_pmtu(asoc);
+
+       /* Retransmit with the new pmtu setting. */
        sctp_retransmit(&asoc->outqueue, t, SCTP_RTXR_PMTUD);
 }
 
index 3253f724a995256084dcb1f6610de3384b475e79..9b01e994f66108a12abc31f452482f1de8749d84 100644 (file)
@@ -2277,7 +2277,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
 
                if (asoc && sctp_outq_is_empty(&asoc->outqueue)) {
                        event = sctp_ulpevent_make_sender_dry_event(asoc,
-                                       GFP_ATOMIC);
+                                       GFP_USER | __GFP_NOWARN);
                        if (!event)
                                return -ENOMEM;
 
@@ -3498,6 +3498,8 @@ static int sctp_setsockopt_hmac_ident(struct sock *sk,
 
        if (optlen < sizeof(struct sctp_hmacalgo))
                return -EINVAL;
+       optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) +
+                                            SCTP_AUTH_NUM_HMACS * sizeof(u16));
 
        hmacs = memdup_user(optval, optlen);
        if (IS_ERR(hmacs))
@@ -3536,6 +3538,11 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 
        if (optlen <= sizeof(struct sctp_authkey))
                return -EINVAL;
+       /* authkey->sca_keylength is u16, so optlen can't be bigger than
+        * this.
+        */
+       optlen = min_t(unsigned int, optlen, USHRT_MAX +
+                                            sizeof(struct sctp_authkey));
 
        authkey = memdup_user(optval, optlen);
        if (IS_ERR(authkey))
@@ -3893,6 +3900,9 @@ static int sctp_setsockopt_reset_streams(struct sock *sk,
 
        if (optlen < sizeof(*params))
                return -EINVAL;
+       /* srs_number_streams is u16, so optlen can't be bigger than this. */
+       optlen = min_t(unsigned int, optlen, USHRT_MAX +
+                                            sizeof(__u16) * sizeof(*params));
 
        params = memdup_user(optval, optlen);
        if (IS_ERR(params))
@@ -4498,7 +4508,7 @@ static int sctp_init_sock(struct sock *sk)
        SCTP_DBG_OBJCNT_INC(sock);
 
        local_bh_disable();
-       percpu_counter_inc(&sctp_sockets_allocated);
+       sk_sockets_allocated_inc(sk);
        sock_prot_inuse_add(net, sk->sk_prot, 1);
 
        /* Nothing can fail after this block, otherwise
@@ -4542,7 +4552,7 @@ static void sctp_destroy_sock(struct sock *sk)
        }
        sctp_endpoint_free(sp->ep);
        local_bh_disable();
-       percpu_counter_dec(&sctp_sockets_allocated);
+       sk_sockets_allocated_dec(sk);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
        local_bh_enable();
 }
@@ -5015,7 +5025,7 @@ static int sctp_getsockopt_autoclose(struct sock *sk, int len, char __user *optv
        len = sizeof(int);
        if (put_user(len, optlen))
                return -EFAULT;
-       if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int)))
+       if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len))
                return -EFAULT;
        return 0;
 }
@@ -5645,6 +5655,9 @@ copy_getaddrs:
                err = -EFAULT;
                goto out;
        }
+       /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too,
+        * but we can't change it anymore.
+        */
        if (put_user(bytes_copied, optlen))
                err = -EFAULT;
 out:
@@ -6081,7 +6094,7 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
                params.assoc_id = 0;
        } else if (len >= sizeof(struct sctp_assoc_value)) {
                len = sizeof(struct sctp_assoc_value);
-               if (copy_from_user(&params, optval, sizeof(params)))
+               if (copy_from_user(&params, optval, len))
                        return -EFAULT;
        } else
                return -EINVAL;
@@ -6251,7 +6264,9 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
 
        if (len < sizeof(struct sctp_authkeyid))
                return -EINVAL;
-       if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid)))
+
+       len = sizeof(struct sctp_authkeyid);
+       if (copy_from_user(&val, optval, len))
                return -EFAULT;
 
        asoc = sctp_id2assoc(sk, val.scact_assoc_id);
@@ -6263,7 +6278,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len,
        else
                val.scact_keynumber = ep->active_key_id;
 
-       len = sizeof(struct sctp_authkeyid);
        if (put_user(len, optlen))
                return -EFAULT;
        if (copy_to_user(optval, &val, len))
@@ -6289,7 +6303,7 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
        if (len < sizeof(struct sctp_authchunks))
                return -EINVAL;
 
-       if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+       if (copy_from_user(&val, optval, sizeof(val)))
                return -EFAULT;
 
        to = p->gauth_chunks;
@@ -6334,7 +6348,7 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
        if (len < sizeof(struct sctp_authchunks))
                return -EINVAL;
 
-       if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks)))
+       if (copy_from_user(&val, optval, sizeof(val)))
                return -EFAULT;
 
        to = p->gauth_chunks;
index 76ea66be0bbee7d3f018676d52c8b95ba06dbcb1..524dfeb94c41ab1ac735746a8acf93af1c96ae48 100644 (file)
@@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
        sctp_stream_outq_migrate(stream, NULL, outcnt);
        sched->sched_all(stream);
 
-       i = sctp_stream_alloc_out(stream, outcnt, gfp);
-       if (i)
-               return i;
+       ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+       if (ret)
+               goto out;
 
        stream->outcnt = outcnt;
        for (i = 0; i < stream->outcnt; i++)
@@ -170,19 +170,17 @@ in:
        if (!incnt)
                goto out;
 
-       i = sctp_stream_alloc_in(stream, incnt, gfp);
-       if (i) {
-               ret = -ENOMEM;
-               goto free;
+       ret = sctp_stream_alloc_in(stream, incnt, gfp);
+       if (ret) {
+               sched->free(stream);
+               kfree(stream->out);
+               stream->out = NULL;
+               stream->outcnt = 0;
+               goto out;
        }
 
        stream->incnt = incnt;
-       goto out;
 
-free:
-       sched->free(stream);
-       kfree(stream->out);
-       stream->out = NULL;
 out:
        return ret;
 }
index 1e5a22430cf56e40a6f323081beb97836b506384..47f82bd794d915188bad037463c2aa14175a55ef 100644 (file)
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
                transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 {
        struct dst_entry *dst = sctp_transport_dst_check(t);
+       bool change = true;
 
        if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
-               pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
-                       __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
-               /* Use default minimum segment size and disable
-                * pmtu discovery on this transport.
-                */
-               t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
-       } else {
-               t->pathmtu = pmtu;
+               pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
+                                   __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
+               /* Use default minimum segment instead */
+               pmtu = SCTP_DEFAULT_MINSEGMENT;
        }
+       pmtu = SCTP_TRUNC4(pmtu);
 
        if (dst) {
                dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
                dst = sctp_transport_dst_check(t);
        }
 
-       if (!dst)
+       if (!dst) {
                t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
+               dst = t->dst;
+       }
+
+       if (dst) {
+               /* Re-fetch, as under layers may have a higher minimum size */
+               pmtu = SCTP_TRUNC4(dst_mtu(dst));
+               change = t->pathmtu != pmtu;
+       }
+       t->pathmtu = pmtu;
+
+       return change;
 }
 
 /* Caches the dst entry and source address for a transport's destination
index a71be33f3afeb0aaaef174ee082c4c547aab1e2d..e36ec5dd64c6ff969fc30aae893d1d5ca8c221bf 100644 (file)
@@ -1084,29 +1084,21 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
 void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
                      gfp_t gfp)
 {
-       struct sctp_association *asoc;
-       __u16 needed, freed;
-
-       asoc = ulpq->asoc;
+       struct sctp_association *asoc = ulpq->asoc;
+       __u32 freed = 0;
+       __u16 needed;
 
-       if (chunk) {
-               needed = ntohs(chunk->chunk_hdr->length);
-               needed -= sizeof(struct sctp_data_chunk);
-       } else
-               needed = SCTP_DEFAULT_MAXWINDOW;
-
-       freed = 0;
+       needed = ntohs(chunk->chunk_hdr->length) -
+                sizeof(struct sctp_data_chunk);
 
        if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
                freed = sctp_ulpq_renege_order(ulpq, needed);
-               if (freed < needed) {
+               if (freed < needed)
                        freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
-               }
        }
        /* If able to free enough room, accept this chunk. */
-       if (chunk && (freed >= needed)) {
-               int retval;
-               retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
+       if (freed >= needed) {
+               int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
                /*
                 * Enter partial delivery if chunk has not been
                 * delivered; otherwise, drain the reassembly queue.
index 05f361faec451cdd69168dd5e2cd7c2ca7c8f7fb..6f05d5c4bf30759ea69944bd212891b9ce49469e 100644 (file)
@@ -436,8 +436,10 @@ static int sock_map_fd(struct socket *sock, int flags)
 {
        struct file *newfile;
        int fd = get_unused_fd_flags(flags);
-       if (unlikely(fd < 0))
+       if (unlikely(fd < 0)) {
+               sock_release(sock);
                return fd;
+       }
 
        newfile = sock_alloc_file(sock, flags, NULL);
        if (likely(!IS_ERR(newfile))) {
@@ -2619,6 +2621,15 @@ out_fs:
 
 core_initcall(sock_init);      /* early initcall */
 
+static int __init jit_init(void)
+{
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+       bpf_jit_enable = 1;
+#endif
+       return 0;
+}
+pure_initcall(jit_init);
+
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
index c5fda15ba3193f811151043ac3675a2ebfb15c38..1fdab5c4eda8c2a218448abb9bc461cf6474615c 100644 (file)
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
         * allows a thread in BH context to safely check if the process
         * lock is held. In this case, if the lock is held, queue work.
         */
-       if (sock_owned_by_user(strp->sk)) {
+       if (sock_owned_by_user_nocheck(strp->sk)) {
                queue_work(strp_wq, &strp->work);
                return;
        }
index 47ec121574ce4ef95850f688d85b50eff766a710..c8001471da6c3c53be6c63dde1311302b093f415 100644 (file)
@@ -324,6 +324,7 @@ restart:
        if (res) {
                pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
                        name, -res);
+               kfree(b);
                return -EINVAL;
        }
 
@@ -347,8 +348,10 @@ restart:
        if (skb)
                tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
 
-       if (tipc_mon_create(net, bearer_id))
+       if (tipc_mon_create(net, bearer_id)) {
+               bearer_disable(net, b);
                return -ENOMEM;
+       }
 
        pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
                name,
index 95fec2c057d6ebdb223e19ef83bf9c383cb2156e..5f4ffae807eee899a51ab79c258d3b26a8c863d9 100644 (file)
@@ -109,7 +109,8 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
 static void tipc_group_decr_active(struct tipc_group *grp,
                                   struct tipc_member *m)
 {
-       if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+       if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING ||
+           m->state == MBR_REMITTED)
                grp->active_cnt--;
 }
 
@@ -351,8 +352,7 @@ void tipc_group_update_member(struct tipc_member *m, int len)
        if (m->window >= ADV_IDLE)
                return;
 
-       if (!list_empty(&m->congested))
-               return;
+       list_del_init(&m->congested);
 
        /* Sort member into congested members' list */
        list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
@@ -369,18 +369,20 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
        u16 prev = grp->bc_snd_nxt - 1;
        struct tipc_member *m;
        struct rb_node *n;
+       u16 ackers = 0;
 
        for (n = rb_first(&grp->members); n; n = rb_next(n)) {
                m = container_of(n, struct tipc_member, tree_node);
                if (tipc_group_is_enabled(m)) {
                        tipc_group_update_member(m, len);
                        m->bc_acked = prev;
+                       ackers++;
                }
        }
 
        /* Mark number of acknowledges to expect, if any */
        if (ack)
-               grp->bc_ackers = grp->member_cnt;
+               grp->bc_ackers = ackers;
        grp->bc_snd_nxt++;
 }
 
@@ -561,7 +563,7 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
        int max_active = grp->max_active;
        int reclaim_limit = max_active * 3 / 4;
        int active_cnt = grp->active_cnt;
-       struct tipc_member *m, *rm;
+       struct tipc_member *m, *rm, *pm;
 
        m = tipc_group_find_member(grp, node, port);
        if (!m)
@@ -604,6 +606,17 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
                        pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
                        tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
                }
+               grp->active_cnt--;
+               list_del_init(&m->list);
+               if (list_empty(&grp->pending))
+                       return;
+
+               /* Set oldest pending member to active and advertise */
+               pm = list_first_entry(&grp->pending, struct tipc_member, list);
+               pm->state = MBR_ACTIVE;
+               list_move_tail(&pm->list, &grp->active);
+               grp->active_cnt++;
+               tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
                break;
        case MBR_RECLAIMING:
        case MBR_DISCOVERED:
@@ -648,6 +661,7 @@ static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
        } else if (mtyp == GRP_REMIT_MSG) {
                msg_set_grp_remitted(hdr, m->window);
        }
+       msg_set_dest_droppable(hdr, true);
        __skb_queue_tail(xmitq, skb);
 }
 
@@ -689,15 +703,16 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                        msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
                        __skb_queue_tail(inputq, m->event_msg);
                }
-               if (m->window < ADV_IDLE)
-                       tipc_group_update_member(m, 0);
-               else
-                       list_del_init(&m->congested);
+               list_del_init(&m->congested);
+               tipc_group_update_member(m, 0);
                return;
        case GRP_LEAVE_MSG:
                if (!m)
                        return;
                m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+               list_del_init(&m->list);
+               list_del_init(&m->congested);
+               *usr_wakeup = true;
 
                /* Wait until WITHDRAW event is received */
                if (m->state != MBR_LEAVING) {
@@ -709,8 +724,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                ehdr = buf_msg(m->event_msg);
                msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
                __skb_queue_tail(inputq, m->event_msg);
-               *usr_wakeup = true;
-               list_del_init(&m->congested);
                return;
        case GRP_ADV_MSG:
                if (!m)
@@ -741,14 +754,14 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                if (!m || m->state != MBR_RECLAIMING)
                        return;
 
-               list_del_init(&m->list);
-               grp->active_cnt--;
                remitted = msg_grp_remitted(hdr);
 
                /* Messages preceding the REMIT still in receive queue */
                if (m->advertised > remitted) {
                        m->state = MBR_REMITTED;
                        in_flight = m->advertised - remitted;
+                       m->advertised = ADV_IDLE + in_flight;
+                       return;
                }
                /* All messages preceding the REMIT have been read */
                if (m->advertised <= remitted) {
@@ -760,6 +773,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
                        tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
 
                m->advertised = ADV_IDLE + in_flight;
+               grp->active_cnt--;
+               list_del_init(&m->list);
 
                /* Set oldest pending member to active and advertise */
                if (list_empty(&grp->pending))
@@ -849,19 +864,29 @@ void tipc_group_member_evt(struct tipc_group *grp,
                *usr_wakeup = true;
                m->usr_pending = false;
                node_up = tipc_node_is_up(net, node);
-
-               /* Hold back event if more messages might be expected */
-               if (m->state != MBR_LEAVING && node_up) {
-                       m->event_msg = skb;
-                       tipc_group_decr_active(grp, m);
-                       m->state = MBR_LEAVING;
-               } else {
-                       if (node_up)
+               m->event_msg = NULL;
+
+               if (node_up) {
+                       /* Hold back event if a LEAVE msg should be expected */
+                       if (m->state != MBR_LEAVING) {
+                               m->event_msg = skb;
+                               tipc_group_decr_active(grp, m);
+                               m->state = MBR_LEAVING;
+                       } else {
                                msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
-                       else
+                               __skb_queue_tail(inputq, skb);
+                       }
+               } else {
+                       if (m->state != MBR_LEAVING) {
+                               tipc_group_decr_active(grp, m);
+                               m->state = MBR_LEAVING;
                                msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
+                       } else {
+                               msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+                       }
                        __skb_queue_tail(inputq, skb);
                }
+               list_del_init(&m->list);
                list_del_init(&m->congested);
        }
        *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
index 8e884ed06d4b13d9751c27a51959b216b7f48b18..32dc33a94bc714f762a066389a4907e558244cd7 100644 (file)
@@ -642,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id)
 {
        struct tipc_net *tn = tipc_net(net);
        struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
-       struct tipc_peer *self = get_self(net, bearer_id);
+       struct tipc_peer *self;
        struct tipc_peer *peer, *tmp;
 
+       if (!mon)
+               return;
+
+       self = get_self(net, bearer_id);
        write_lock_bh(&mon->lock);
        tn->monitors[bearer_id] = NULL;
        list_for_each_entry_safe(peer, tmp, &self->list, list) {
index 41127d0b925ea4d515e7c7bbe6739dee99a442f2..3b408448037769d3dba2da38fc00cdaf360e4950 100644 (file)
@@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
 
        switch (sk->sk_state) {
        case TIPC_ESTABLISHED:
+       case TIPC_CONNECTING:
                if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
                        revents |= POLLOUT;
                /* fall thru' */
        case TIPC_LISTEN:
-       case TIPC_CONNECTING:
                if (!skb_queue_empty(&sk->sk_receive_queue))
                        revents |= POLLIN | POLLRDNORM;
                break;
index d7d6cb00c47bbab3c963f4b83839aadcc94606a1..1d84f91bbfb0c8c9087e309821eb687325733358 100644 (file)
@@ -23,27 +23,14 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
 cfg80211-y += extra-certs.o
 endif
 
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
        @$(kecho) "  GEN     $@"
-       @(set -e; \
-         allf=""; \
-         for f in $^ ; do \
-             # similar to hexdump -v -e '1/1 "0x%.2x," "\n"' \
-             thisf=$$(od -An -v -tx1 < $$f | \
-                          sed -e 's/ /\n/g' | \
-                          sed -e 's/^[0-9a-f]\+$$/\0/;t;d' | \
-                          sed -e 's/^/0x/;s/$$/,/'); \
-             # file should not be empty - maybe command substitution failed? \
-             test ! -z "$$thisf";\
-             allf=$$allf$$thisf;\
-         done; \
-         ( \
-             echo '#include "reg.h"'; \
-             echo 'const u8 shipped_regdb_certs[] = {'; \
-             echo "$$allf"; \
-             echo '};'; \
-             echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
-         ) >> $@)
+       @(echo '#include "reg.h"'; \
+         echo 'const u8 shipped_regdb_certs[] = {'; \
+         cat $^ ; \
+         echo '};'; \
+         echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
+        ) > $@
 
 $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
                      $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
@@ -66,4 +53,6 @@ $(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
              echo "$$allf"; \
              echo '};'; \
              echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);'; \
-         ) >> $@)
+         ) > $@)
+
+clean-files += shipped-certs.c extra-certs.c
diff --git a/net/wireless/certs/sforshee.hex b/net/wireless/certs/sforshee.hex
new file mode 100644 (file)
index 0000000..14ea666
--- /dev/null
@@ -0,0 +1,86 @@
+/* Seth Forshee's regdb certificate */
+0x30, 0x82, 0x02, 0xa4, 0x30, 0x82, 0x01, 0x8c,
+0x02, 0x09, 0x00, 0xb2, 0x8d, 0xdf, 0x47, 0xae,
+0xf9, 0xce, 0xa7, 0x30, 0x0d, 0x06, 0x09, 0x2a,
+0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x0b,
+0x05, 0x00, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f,
+0x06, 0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73,
+0x66, 0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30,
+0x20, 0x17, 0x0d, 0x31, 0x37, 0x31, 0x30, 0x30,
+0x36, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35, 0x5a,
+0x18, 0x0f, 0x32, 0x31, 0x31, 0x37, 0x30, 0x39,
+0x31, 0x32, 0x31, 0x39, 0x34, 0x30, 0x33, 0x35,
+0x5a, 0x30, 0x13, 0x31, 0x11, 0x30, 0x0f, 0x06,
+0x03, 0x55, 0x04, 0x03, 0x0c, 0x08, 0x73, 0x66,
+0x6f, 0x72, 0x73, 0x68, 0x65, 0x65, 0x30, 0x82,
+0x01, 0x22, 0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86,
+0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05,
+0x00, 0x03, 0x82, 0x01, 0x0f, 0x00, 0x30, 0x82,
+0x01, 0x0a, 0x02, 0x82, 0x01, 0x01, 0x00, 0xb5,
+0x40, 0xe3, 0x9c, 0x28, 0x84, 0x39, 0x03, 0xf2,
+0x39, 0xd7, 0x66, 0x2c, 0x41, 0x38, 0x15, 0xac,
+0x7e, 0xa5, 0x83, 0x71, 0x25, 0x7e, 0x90, 0x7c,
+0x68, 0xdd, 0x6f, 0x3f, 0xd9, 0xd7, 0x59, 0x38,
+0x9f, 0x7c, 0x6a, 0x52, 0xc2, 0x03, 0x2a, 0x2d,
+0x7e, 0x66, 0xf4, 0x1e, 0xb3, 0x12, 0x70, 0x20,
+0x5b, 0xd4, 0x97, 0x32, 0x3d, 0x71, 0x8b, 0x3b,
+0x1b, 0x08, 0x17, 0x14, 0x6b, 0x61, 0xc4, 0x57,
+0x8b, 0x96, 0x16, 0x1c, 0xfd, 0x24, 0xd5, 0x0b,
+0x09, 0xf9, 0x68, 0x11, 0x84, 0xfb, 0xca, 0x51,
+0x0c, 0xd1, 0x45, 0x19, 0xda, 0x10, 0x44, 0x8a,
+0xd9, 0xfe, 0x76, 0xa9, 0xfd, 0x60, 0x2d, 0x18,
+0x0b, 0x28, 0x95, 0xb2, 0x2d, 0xea, 0x88, 0x98,
+0xb8, 0xd1, 0x56, 0x21, 0xf0, 0x53, 0x1f, 0xf1,
+0x02, 0x6f, 0xe9, 0x46, 0x9b, 0x93, 0x5f, 0x28,
+0x90, 0x0f, 0xac, 0x36, 0xfa, 0x68, 0x23, 0x71,
+0x57, 0x56, 0xf6, 0xcc, 0xd3, 0xdf, 0x7d, 0x2a,
+0xd9, 0x1b, 0x73, 0x45, 0xeb, 0xba, 0x27, 0x85,
+0xef, 0x7a, 0x7f, 0xa5, 0xcb, 0x80, 0xc7, 0x30,
+0x36, 0xd2, 0x53, 0xee, 0xec, 0xac, 0x1e, 0xe7,
+0x31, 0xf1, 0x36, 0xa2, 0x9c, 0x63, 0xc6, 0x65,
+0x5b, 0x7f, 0x25, 0x75, 0x68, 0xa1, 0xea, 0xd3,
+0x7e, 0x00, 0x5c, 0x9a, 0x5e, 0xd8, 0x20, 0x18,
+0x32, 0x77, 0x07, 0x29, 0x12, 0x66, 0x1e, 0x36,
+0x73, 0xe7, 0x97, 0x04, 0x41, 0x37, 0xb1, 0xb1,
+0x72, 0x2b, 0xf4, 0xa1, 0x29, 0x20, 0x7c, 0x96,
+0x79, 0x0b, 0x2b, 0xd0, 0xd8, 0xde, 0xc8, 0x6c,
+0x3f, 0x93, 0xfb, 0xc5, 0xee, 0x78, 0x52, 0x11,
+0x15, 0x1b, 0x7a, 0xf6, 0xe2, 0x68, 0x99, 0xe7,
+0xfb, 0x46, 0x16, 0x84, 0xe3, 0xc7, 0xa1, 0xe6,
+0xe0, 0xd2, 0x46, 0xd5, 0xe1, 0xc4, 0x5f, 0xa0,
+0x66, 0xf4, 0xda, 0xc4, 0xff, 0x95, 0x1d, 0x02,
+0x03, 0x01, 0x00, 0x01, 0x30, 0x0d, 0x06, 0x09,
+0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01, 0x01,
+0x0b, 0x05, 0x00, 0x03, 0x82, 0x01, 0x01, 0x00,
+0x87, 0x03, 0xda, 0xf2, 0x82, 0xc2, 0xdd, 0xaf,
+0x7c, 0x44, 0x2f, 0x86, 0xd3, 0x5f, 0x4c, 0x93,
+0x48, 0xb9, 0xfe, 0x07, 0x17, 0xbb, 0x21, 0xf7,
+0x25, 0x23, 0x4e, 0xaa, 0x22, 0x0c, 0x16, 0xb9,
+0x73, 0xae, 0x9d, 0x46, 0x7c, 0x75, 0xd9, 0xc3,
+0x49, 0x57, 0x47, 0xbf, 0x33, 0xb7, 0x97, 0xec,
+0xf5, 0x40, 0x75, 0xc0, 0x46, 0x22, 0xf0, 0xa0,
+0x5d, 0x9c, 0x79, 0x13, 0xa1, 0xff, 0xb8, 0xa3,
+0x2f, 0x7b, 0x8e, 0x06, 0x3f, 0xc8, 0xb6, 0xe4,
+0x6a, 0x28, 0xf2, 0x34, 0x5c, 0x23, 0x3f, 0x32,
+0xc0, 0xe6, 0xad, 0x0f, 0xac, 0xcf, 0x55, 0x74,
+0x47, 0x73, 0xd3, 0x01, 0x85, 0xb7, 0x0b, 0x22,
+0x56, 0x24, 0x7d, 0x9f, 0x09, 0xa9, 0x0e, 0x86,
+0x9e, 0x37, 0x5b, 0x9c, 0x6d, 0x02, 0xd9, 0x8c,
+0xc8, 0x50, 0x6a, 0xe2, 0x59, 0xf3, 0x16, 0x06,
+0xea, 0xb2, 0x42, 0xb5, 0x58, 0xfe, 0xba, 0xd1,
+0x81, 0x57, 0x1a, 0xef, 0xb2, 0x38, 0x88, 0x58,
+0xf6, 0xaa, 0xc4, 0x2e, 0x8b, 0x5a, 0x27, 0xe4,
+0xa5, 0xe8, 0xa4, 0xca, 0x67, 0x5c, 0xac, 0x72,
+0x67, 0xc3, 0x6f, 0x13, 0xc3, 0x2d, 0x35, 0x79,
+0xd7, 0x8a, 0xe7, 0xf5, 0xd4, 0x21, 0x30, 0x4a,
+0xd5, 0xf6, 0xa3, 0xd9, 0x79, 0x56, 0xf2, 0x0f,
+0x10, 0xf7, 0x7d, 0xd0, 0x51, 0x93, 0x2f, 0x47,
+0xf8, 0x7d, 0x4b, 0x0a, 0x84, 0x55, 0x12, 0x0a,
+0x7d, 0x4e, 0x3b, 0x1f, 0x2b, 0x2f, 0xfc, 0x28,
+0xb3, 0x69, 0x34, 0xe1, 0x80, 0x80, 0xbb, 0xe2,
+0xaf, 0xb9, 0xd6, 0x30, 0xf1, 0x1d, 0x54, 0x87,
+0x23, 0x99, 0x9f, 0x51, 0x03, 0x4c, 0x45, 0x7d,
+0x02, 0x65, 0x73, 0xab, 0xfd, 0xcf, 0x94, 0xcc,
+0x0d, 0x3a, 0x60, 0xfd, 0x3c, 0x14, 0x2f, 0x16,
+0x33, 0xa9, 0x21, 0x1f, 0xcb, 0x50, 0xb1, 0x8f,
+0x03, 0xee, 0xa0, 0x66, 0xa9, 0x16, 0x79, 0x14,
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
deleted file mode 100644 (file)
index c6f8f9d..0000000
Binary files a/net/wireless/certs/sforshee.x509 and /dev/null differ
index b1ac23ca20c86be0af71e9a1ba92cc99d8d5a967..2b3dbcd40e46390debf84f4a127b23f185e76b39 100644 (file)
@@ -2610,7 +2610,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
        case NL80211_IFTYPE_AP:
                if (wdev->ssid_len &&
                    nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
-                       goto nla_put_failure;
+                       goto nla_put_failure_locked;
                break;
        case NL80211_IFTYPE_STATION:
        case NL80211_IFTYPE_P2P_CLIENT:
@@ -2623,7 +2623,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
                if (!ssid_ie)
                        break;
                if (nla_put(msg, NL80211_ATTR_SSID, ssid_ie[1], ssid_ie + 2))
-                       goto nla_put_failure;
+                       goto nla_put_failure_locked;
                break;
                }
        default:
@@ -2635,6 +2635,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
        genlmsg_end(msg, hdr);
        return 0;
 
+ nla_put_failure_locked:
+       wdev_unlock(wdev);
  nla_put_failure:
        genlmsg_cancel(msg, hdr);
        return -EMSGSIZE;
@@ -11359,7 +11361,8 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
                break;
        case NL80211_NAN_FUNC_FOLLOW_UP:
                if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
-                   !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+                   !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] ||
+                   !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) {
                        err = -EINVAL;
                        goto out;
                }
index 347ab31574d509ac9edd448bcbb7501a05f5ac2d..3f6f6f8c9fa5224e75c1b62f299f217da172d370 100644 (file)
@@ -8,15 +8,29 @@
  *
  */
 
+#include <linux/bottom_half.h>
+#include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/percpu.h>
 #include <net/dst.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/ip_tunnels.h>
 #include <net/ip6_tunnel.h>
 
+struct xfrm_trans_tasklet {
+       struct tasklet_struct tasklet;
+       struct sk_buff_head queue;
+};
+
+struct xfrm_trans_cb {
+       int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
+};
+
+#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
+
 static struct kmem_cache *secpath_cachep __read_mostly;
 
 static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@ -25,6 +39,8 @@ static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1];
 static struct gro_cells gro_cells;
 static struct net_device xfrm_napi_dev;
 
+static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
+
 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
 {
        int err = 0;
@@ -207,7 +223,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
        xfrm_address_t *daddr;
        struct xfrm_mode *inner_mode;
        u32 mark = skb->mark;
-       unsigned int family;
+       unsigned int family = AF_UNSPEC;
        int decaps = 0;
        int async = 0;
        bool xfrm_gro = false;
@@ -216,6 +232,16 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 
        if (encap_type < 0) {
                x = xfrm_input_state(skb);
+
+               if (unlikely(x->km.state != XFRM_STATE_VALID)) {
+                       if (x->km.state == XFRM_STATE_ACQ)
+                               XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
+                       else
+                               XFRM_INC_STATS(net,
+                                              LINUX_MIB_XFRMINSTATEINVALID);
+                       goto drop;
+               }
+
                family = x->outer_mode->afinfo->family;
 
                /* An encap_type of -1 indicates async resumption. */
@@ -467,9 +493,41 @@ int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
 }
 EXPORT_SYMBOL(xfrm_input_resume);
 
+static void xfrm_trans_reinject(unsigned long data)
+{
+       struct xfrm_trans_tasklet *trans = (void *)data;
+       struct sk_buff_head queue;
+       struct sk_buff *skb;
+
+       __skb_queue_head_init(&queue);
+       skb_queue_splice_init(&trans->queue, &queue);
+
+       while ((skb = __skb_dequeue(&queue)))
+               XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
+}
+
+int xfrm_trans_queue(struct sk_buff *skb,
+                    int (*finish)(struct net *, struct sock *,
+                                  struct sk_buff *))
+{
+       struct xfrm_trans_tasklet *trans;
+
+       trans = this_cpu_ptr(&xfrm_trans_tasklet);
+
+       if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+               return -ENOBUFS;
+
+       XFRM_TRANS_SKB_CB(skb)->finish = finish;
+       skb_queue_tail(&trans->queue, skb);
+       tasklet_schedule(&trans->tasklet);
+       return 0;
+}
+EXPORT_SYMBOL(xfrm_trans_queue);
+
 void __init xfrm_input_init(void)
 {
        int err;
+       int i;
 
        init_dummy_netdev(&xfrm_napi_dev);
        err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@ -480,4 +538,13 @@ void __init xfrm_input_init(void)
                                           sizeof(struct sec_path),
                                           0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                                           NULL);
+
+       for_each_possible_cpu(i) {
+               struct xfrm_trans_tasklet *trans;
+
+               trans = &per_cpu(xfrm_trans_tasklet, i);
+               __skb_queue_head_init(&trans->queue);
+               tasklet_init(&trans->tasklet, xfrm_trans_reinject,
+                            (unsigned long)trans);
+       }
 }
index 9542975eb2f90dcb2bae894edeb9b418d04f252e..70aa5cb0c659d54eacb85f92dc95b2db17bfe1d0 100644 (file)
@@ -1168,9 +1168,15 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
  again:
        pol = rcu_dereference(sk->sk_policy[dir]);
        if (pol != NULL) {
-               bool match = xfrm_selector_match(&pol->selector, fl, family);
+               bool match;
                int err = 0;
 
+               if (pol->family != family) {
+                       pol = NULL;
+                       goto out;
+               }
+
+               match = xfrm_selector_match(&pol->selector, fl, family);
                if (match) {
                        if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
                                pol = NULL;
@@ -1833,6 +1839,7 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
                   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
            xfrm_xdst_can_reuse(xdst, xfrm, err)) {
                dst_hold(&xdst->u.dst);
+               xfrm_pols_put(pols, num_pols);
                while (err > 0)
                        xfrm_state_put(xfrm[--err]);
                return xdst;
index 065d89606888ec1bf053577d3949746bcea6f099..500b3391f474b96fe273060ff8eae16f1e23f3c2 100644 (file)
@@ -1343,6 +1343,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 
        if (orig->aead) {
                x->aead = xfrm_algo_aead_clone(orig->aead);
+               x->geniv = orig->geniv;
                if (!x->aead)
                        goto error;
        }
index 983b0233767bec16ba55b763ef82ce781ca5046a..bdb48e5dba0480aa4c3c6855be42cfb93f3bf335 100644 (file)
@@ -1419,11 +1419,14 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
 
 static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 {
+       u16 prev_family;
        int i;
 
        if (nr > XFRM_MAX_DEPTH)
                return -EINVAL;
 
+       prev_family = family;
+
        for (i = 0; i < nr; i++) {
                /* We never validated the ut->family value, so many
                 * applications simply leave it at zero.  The check was
@@ -1435,6 +1438,12 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
                if (!ut[i].family)
                        ut[i].family = family;
 
+               if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
+                   (ut[i].family != prev_family))
+                       return -EINVAL;
+
+               prev_family = ut[i].family;
+
                switch (ut[i].family) {
                case AF_INET:
                        break;
@@ -1445,6 +1454,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
                default:
                        return -EINVAL;
                }
+
+               switch (ut[i].id.proto) {
+               case IPPROTO_AH:
+               case IPPROTO_ESP:
+               case IPPROTO_COMP:
+#if IS_ENABLED(CONFIG_IPV6)
+               case IPPROTO_ROUTING:
+               case IPPROTO_DSTOPTS:
+#endif
+               case IPSEC_PROTO_ANY:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
        }
 
        return 0;
@@ -2470,7 +2494,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
        [XFRMA_PROTO]           = { .type = NLA_U8 },
        [XFRMA_ADDRESS_FILTER]  = { .len = sizeof(struct xfrm_address_filter) },
        [XFRMA_OFFLOAD_DEV]     = { .len = sizeof(struct xfrm_user_offload) },
-       [XFRMA_OUTPUT_MARK]     = { .len = NLA_U32 },
+       [XFRMA_OUTPUT_MARK]     = { .type = NLA_U32 },
 };
 
 static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
index 86dc07a01b4398a3e72e79584a2c447bb5643780..e7836b47f060889a036405e7cf70d04c4177adf9 100644 (file)
@@ -1,4 +1,3 @@
-*.hash.c
 *.lex.c
 *.tab.c
 *.tab.h
index cbf4996dd9c1045ecc7ffa66a9ec79edd99b1038..8cee597d33a594d3d7380390aa2b697218034578 100644 (file)
@@ -893,7 +893,10 @@ static enum string_value_kind expr_parse_string(const char *str,
        switch (type) {
        case S_BOOLEAN:
        case S_TRISTATE:
-               return k_string;
+               val->s = !strcmp(str, "n") ? 0 :
+                        !strcmp(str, "m") ? 1 :
+                        !strcmp(str, "y") ? 2 : -1;
+               return k_signed;
        case S_INT:
                val->s = strtoll(str, &tail, 10);
                kind = k_signed;
index e8e449444e658be4a9190c6ea2de14cca8fc4890..b0cb9a5f94480d2c5aaf290b31fd15e6c5930ec2 100644 (file)
@@ -54,6 +54,17 @@ config SECURITY_NETWORK
          implement socket and networking access controls.
          If you are unsure how to answer this question, answer N.
 
+config PAGE_TABLE_ISOLATION
+       bool "Remove the kernel mapping in user mode"
+       default y
+       depends on X86_64 && !UML
+       help
+         This feature reduces the number of hardware side channels by
+         ensuring that the majority of kernel addresses are not mapped
+         into userspace.
+
+         See Documentation/x86/pti.txt for more details.
+
 config SECURITY_INFINIBAND
        bool "Infiniband Security Hooks"
        depends on SECURITY && INFINIBAND
index 04ba9d0718ea590b7c5033cfc4b952c701acb54d..6a54d2ffa84012fc7fd27aef18c7fa0410e050e3 100644 (file)
@@ -330,10 +330,7 @@ static struct aa_profile *__attach_match(const char *name,
                        continue;
 
                if (profile->xmatch) {
-                       if (profile->xmatch_len == len) {
-                               conflict = true;
-                               continue;
-                       } else if (profile->xmatch_len > len) {
+                       if (profile->xmatch_len >= len) {
                                unsigned int state;
                                u32 perm;
 
@@ -342,6 +339,10 @@ static struct aa_profile *__attach_match(const char *name,
                                perm = dfa_user_allow(profile->xmatch, state);
                                /* any accepting state means a valid match. */
                                if (perm & MAY_EXEC) {
+                                       if (profile->xmatch_len == len) {
+                                               conflict = true;
+                                               continue;
+                                       }
                                        candidate = profile;
                                        len = profile->xmatch_len;
                                        conflict = false;
index 2b27bb79aec4421665aaa6f2a2490868227bed0f..d7b7e711516010806d194ed39b2a24e1a9f9fd09 100644 (file)
@@ -133,6 +133,9 @@ extern struct aa_perms allperms;
 #define xcheck_labels_profiles(L1, L2, FN, args...)            \
        xcheck_ns_labels((L1), (L2), xcheck_ns_profile_label, (FN), args)
 
+#define xcheck_labels(L1, L2, P, FN1, FN2)                     \
+       xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2)))
+
 
 void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask);
 void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask);
index 7ca0032e7ba96ef374aefe98d47acb919f3f81d3..b40678f3c1d5a4d62eeb1255f26e2a73aabd6767 100644 (file)
@@ -64,40 +64,48 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va)
                        FLAGS_NONE, GFP_ATOMIC);
 }
 
+/* assumes check for PROFILE_MEDIATES is already done */
 /* TODO: conditionals */
 static int profile_ptrace_perm(struct aa_profile *profile,
-                              struct aa_profile *peer, u32 request,
-                              struct common_audit_data *sa)
+                            struct aa_label *peer, u32 request,
+                            struct common_audit_data *sa)
 {
        struct aa_perms perms = { };
 
-       /* need because of peer in cross check */
-       if (profile_unconfined(profile) ||
-           !PROFILE_MEDIATES(profile, AA_CLASS_PTRACE))
-               return 0;
-
-       aad(sa)->peer = &peer->label;
-       aa_profile_match_label(profile, &peer->label, AA_CLASS_PTRACE, request,
+       aad(sa)->peer = peer;
+       aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request,
                               &perms);
        aa_apply_modes_to_perms(profile, &perms);
        return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb);
 }
 
-static int cross_ptrace_perm(struct aa_profile *tracer,
-                            struct aa_profile *tracee, u32 request,
-                            struct common_audit_data *sa)
+static int profile_tracee_perm(struct aa_profile *tracee,
+                              struct aa_label *tracer, u32 request,
+                              struct common_audit_data *sa)
 {
+       if (profile_unconfined(tracee) || unconfined(tracer) ||
+           !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE))
+               return 0;
+
+       return profile_ptrace_perm(tracee, tracer, request, sa);
+}
+
+static int profile_tracer_perm(struct aa_profile *tracer,
+                              struct aa_label *tracee, u32 request,
+                              struct common_audit_data *sa)
+{
+       if (profile_unconfined(tracer))
+               return 0;
+
        if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE))
-               return xcheck(profile_ptrace_perm(tracer, tracee, request, sa),
-                             profile_ptrace_perm(tracee, tracer,
-                                                 request << PTRACE_PERM_SHIFT,
-                                                 sa));
-       /* policy uses the old style capability check for ptrace */
-       if (profile_unconfined(tracer) || tracer == tracee)
+               return profile_ptrace_perm(tracer, tracee, request, sa);
+
+       /* profile uses the old style capability check for ptrace */
+       if (&tracer->label == tracee)
                return 0;
 
        aad(sa)->label = &tracer->label;
-       aad(sa)->peer = &tracee->label;
+       aad(sa)->peer = tracee;
        aad(sa)->request = 0;
        aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1);
 
@@ -115,10 +123,13 @@ static int cross_ptrace_perm(struct aa_profile *tracer,
 int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
                  u32 request)
 {
+       struct aa_profile *profile;
+       u32 xrequest = request << PTRACE_PERM_SHIFT;
        DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE);
 
-       return xcheck_labels_profiles(tracer, tracee, cross_ptrace_perm,
-                                     request, &sa);
+       return xcheck_labels(tracer, tracee, profile,
+                       profile_tracer_perm(profile, tracee, request, &sa),
+                       profile_tracee_perm(profile, tracer, xrequest, &sa));
 }
 
 
index ed9b4d0f9f7e212b161c1a312c52b56f5e0b0b49..8c558cbce930b15a69a7451973a974152fbb6a08 100644 (file)
@@ -329,6 +329,9 @@ static int match_mnt_path_str(struct aa_profile *profile,
        AA_BUG(!mntpath);
        AA_BUG(!buffer);
 
+       if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+               return 0;
+
        error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer,
                             &mntpnt, &info, profile->disconnected);
        if (error)
@@ -380,6 +383,9 @@ static int match_mnt(struct aa_profile *profile, const struct path *path,
        AA_BUG(!profile);
        AA_BUG(devpath && !devbuffer);
 
+       if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+               return 0;
+
        if (devpath) {
                error = aa_path_name(devpath, path_flags(profile, devpath),
                                     devbuffer, &devname, &info,
@@ -558,6 +564,9 @@ static int profile_umount(struct aa_profile *profile, struct path *path,
        AA_BUG(!profile);
        AA_BUG(!path);
 
+       if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+               return 0;
+
        error = aa_path_name(path, path_flags(profile, path), buffer, &name,
                             &info, profile->disconnected);
        if (error)
@@ -613,7 +622,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
        AA_BUG(!new_path);
        AA_BUG(!old_path);
 
-       if (profile_unconfined(profile))
+       if (profile_unconfined(profile) ||
+           !PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
                return aa_get_newest_label(&profile->label);
 
        error = aa_path_name(old_path, path_flags(profile, old_path),
index 4f8e0934095679f71e9674d4a567c6c1fe492f9e..48620c93d6976eca3a9b1cc3c34cfe21a69c7a39 100644 (file)
@@ -348,21 +348,18 @@ static __u32 sansflags(__u32 m)
        return m & ~VFS_CAP_FLAGS_EFFECTIVE;
 }
 
-static bool is_v2header(size_t size, __le32 magic)
+static bool is_v2header(size_t size, const struct vfs_cap_data *cap)
 {
-       __u32 m = le32_to_cpu(magic);
        if (size != XATTR_CAPS_SZ_2)
                return false;
-       return sansflags(m) == VFS_CAP_REVISION_2;
+       return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
 }
 
-static bool is_v3header(size_t size, __le32 magic)
+static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
 {
-       __u32 m = le32_to_cpu(magic);
-
        if (size != XATTR_CAPS_SZ_3)
                return false;
-       return sansflags(m) == VFS_CAP_REVISION_3;
+       return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
 }
 
 /*
@@ -405,7 +402,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
 
        fs_ns = inode->i_sb->s_user_ns;
        cap = (struct vfs_cap_data *) tmpbuf;
-       if (is_v2header((size_t) ret, cap->magic_etc)) {
+       if (is_v2header((size_t) ret, cap)) {
                /* If this is sizeof(vfs_cap_data) then we're ok with the
                 * on-disk value, so return that.  */
                if (alloc)
@@ -413,7 +410,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
                else
                        kfree(tmpbuf);
                return ret;
-       } else if (!is_v3header((size_t) ret, cap->magic_etc)) {
+       } else if (!is_v3header((size_t) ret, cap)) {
                kfree(tmpbuf);
                return -EINVAL;
        }
@@ -470,9 +467,9 @@ static kuid_t rootid_from_xattr(const void *value, size_t size,
        return make_kuid(task_ns, rootid);
 }
 
-static bool validheader(size_t size, __le32 magic)
+static bool validheader(size_t size, const struct vfs_cap_data *cap)
 {
-       return is_v2header(size, magic) || is_v3header(size, magic);
+       return is_v2header(size, cap) || is_v3header(size, cap);
 }
 
 /*
@@ -495,7 +492,7 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
 
        if (!*ivalue)
                return -EINVAL;
-       if (!validheader(size, cap->magic_etc))
+       if (!validheader(size, cap))
                return -EINVAL;
        if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
                return -EPERM;
index e49f448ee04f4af00a7c7c00755f8a29b226bb6f..c2db7e905f7d69b4f9b153c8a72b0f463c3d4287 100644 (file)
@@ -455,7 +455,6 @@ static int snd_pcm_hw_param_near(struct snd_pcm_substream *pcm,
                v = snd_pcm_hw_param_last(pcm, params, var, dir);
        else
                v = snd_pcm_hw_param_first(pcm, params, var, dir);
-       snd_BUG_ON(v < 0);
        return v;
 }
 
@@ -1335,8 +1334,11 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
 
        if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
                return tmp;
-       mutex_lock(&runtime->oss.params_lock);
        while (bytes > 0) {
+               if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+                       tmp = -ERESTARTSYS;
+                       break;
+               }
                if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
                        tmp = bytes;
                        if (tmp + runtime->oss.buffer_used > runtime->oss.period_bytes)
@@ -1380,14 +1382,18 @@ static ssize_t snd_pcm_oss_write1(struct snd_pcm_substream *substream, const cha
                        xfer += tmp;
                        if ((substream->f_flags & O_NONBLOCK) != 0 &&
                            tmp != runtime->oss.period_bytes)
-                               break;
+                               tmp = -EAGAIN;
                }
-       }
-       mutex_unlock(&runtime->oss.params_lock);
-       return xfer;
-
  err:
-       mutex_unlock(&runtime->oss.params_lock);
+               mutex_unlock(&runtime->oss.params_lock);
+               if (tmp < 0)
+                       break;
+               if (signal_pending(current)) {
+                       tmp = -ERESTARTSYS;
+                       break;
+               }
+               tmp = 0;
+       }
        return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
 }
 
@@ -1435,8 +1441,11 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
 
        if ((tmp = snd_pcm_oss_make_ready(substream)) < 0)
                return tmp;
-       mutex_lock(&runtime->oss.params_lock);
        while (bytes > 0) {
+               if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+                       tmp = -ERESTARTSYS;
+                       break;
+               }
                if (bytes < runtime->oss.period_bytes || runtime->oss.buffer_used > 0) {
                        if (runtime->oss.buffer_used == 0) {
                                tmp = snd_pcm_oss_read2(substream, runtime->oss.buffer, runtime->oss.period_bytes, 1);
@@ -1467,12 +1476,16 @@ static ssize_t snd_pcm_oss_read1(struct snd_pcm_substream *substream, char __use
                        bytes -= tmp;
                        xfer += tmp;
                }
-       }
-       mutex_unlock(&runtime->oss.params_lock);
-       return xfer;
-
  err:
-       mutex_unlock(&runtime->oss.params_lock);
+               mutex_unlock(&runtime->oss.params_lock);
+               if (tmp < 0)
+                       break;
+               if (signal_pending(current)) {
+                       tmp = -ERESTARTSYS;
+                       break;
+               }
+               tmp = 0;
+       }
        return xfer > 0 ? (snd_pcm_sframes_t)xfer : tmp;
 }
 
index cadc937928683cb3abcf7ec8208e8159df31c00f..85a56af104bd619160ace33f51ad2786eca3279a 100644 (file)
@@ -592,18 +592,26 @@ snd_pcm_sframes_t snd_pcm_plug_write_transfer(struct snd_pcm_substream *plug, st
        snd_pcm_sframes_t frames = size;
 
        plugin = snd_pcm_plug_first(plug);
-       while (plugin && frames > 0) {
+       while (plugin) {
+               if (frames <= 0)
+                       return frames;
                if ((next = plugin->next) != NULL) {
                        snd_pcm_sframes_t frames1 = frames;
-                       if (plugin->dst_frames)
+                       if (plugin->dst_frames) {
                                frames1 = plugin->dst_frames(plugin, frames);
+                               if (frames1 <= 0)
+                                       return frames1;
+                       }
                        if ((err = next->client_channels(next, frames1, &dst_channels)) < 0) {
                                return err;
                        }
                        if (err != frames1) {
                                frames = err;
-                               if (plugin->src_frames)
+                               if (plugin->src_frames) {
                                        frames = plugin->src_frames(plugin, frames1);
+                                       if (frames <= 0)
+                                               return frames;
+                               }
                        }
                } else
                        dst_channels = NULL;
index 10e7ef7a8804b1ba0d88b6a0c4ad478f38a9397e..db7894bb028ccc6ca941c45c4a7e37602ec1228d 100644 (file)
@@ -1632,7 +1632,7 @@ int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm,
                return changed;
        if (params->rmask) {
                int err = snd_pcm_hw_refine(pcm, params);
-               if (snd_BUG_ON(err < 0))
+               if (err < 0)
                        return err;
        }
        return snd_pcm_hw_param_value(params, var, dir);
@@ -1678,7 +1678,7 @@ int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm,
                return changed;
        if (params->rmask) {
                int err = snd_pcm_hw_refine(pcm, params);
-               if (snd_BUG_ON(err < 0))
+               if (err < 0)
                        return err;
        }
        return snd_pcm_hw_param_value(params, var, dir);
index a4d92e46c459b014287b729f9fe6b9d9e5c19ebf..f08772568c1709ab560d7ed6db1bf418b08c984c 100644 (file)
@@ -2580,7 +2580,7 @@ static snd_pcm_sframes_t forward_appl_ptr(struct snd_pcm_substream *substream,
        return ret < 0 ? ret : frames;
 }
 
-/* decrease the appl_ptr; returns the processed frames or a negative error */
+/* decrease the appl_ptr; returns the processed frames or zero for error */
 static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
                                         snd_pcm_uframes_t frames,
                                         snd_pcm_sframes_t avail)
@@ -2597,7 +2597,12 @@ static snd_pcm_sframes_t rewind_appl_ptr(struct snd_pcm_substream *substream,
        if (appl_ptr < 0)
                appl_ptr += runtime->boundary;
        ret = pcm_lib_apply_appl_ptr(substream, appl_ptr);
-       return ret < 0 ? ret : frames;
+       /* NOTE: we return zero for errors because PulseAudio gets depressed
+        * upon receiving an error from rewind ioctl and stops processing
+        * any longer.  Returning zero means that no rewind is done, so
+        * it's not absolutely wrong to answer like that.
+        */
+       return ret < 0 ? 0 : frames;
 }
 
 static snd_pcm_sframes_t snd_pcm_playback_rewind(struct snd_pcm_substream *substream,
index b3b353d7252724e10f23a9288cd24aab3ef34007..f055ca10bbc1d33c9c1cee1fd913b7c930984ac1 100644 (file)
@@ -579,15 +579,14 @@ static int snd_rawmidi_info_user(struct snd_rawmidi_substream *substream,
        return 0;
 }
 
-int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+static int __snd_rawmidi_info_select(struct snd_card *card,
+                                    struct snd_rawmidi_info *info)
 {
        struct snd_rawmidi *rmidi;
        struct snd_rawmidi_str *pstr;
        struct snd_rawmidi_substream *substream;
 
-       mutex_lock(&register_mutex);
        rmidi = snd_rawmidi_search(card, info->device);
-       mutex_unlock(&register_mutex);
        if (!rmidi)
                return -ENXIO;
        if (info->stream < 0 || info->stream > 1)
@@ -603,6 +602,16 @@ int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info
        }
        return -ENXIO;
 }
+
+int snd_rawmidi_info_select(struct snd_card *card, struct snd_rawmidi_info *info)
+{
+       int ret;
+
+       mutex_lock(&register_mutex);
+       ret = __snd_rawmidi_info_select(card, info);
+       mutex_unlock(&register_mutex);
+       return ret;
+}
 EXPORT_SYMBOL(snd_rawmidi_info_select);
 
 static int snd_rawmidi_info_select_user(struct snd_card *card,
index afac886ffa28a7c7cef39bb3c4f5d1ba816e890a..0333143a1fa7ad646c4da4b25eb653b2d3a1a3fb 100644 (file)
@@ -39,6 +39,7 @@
 #include <sound/core.h>
 #include <sound/control.h>
 #include <sound/pcm.h>
+#include <sound/pcm_params.h>
 #include <sound/info.h>
 #include <sound/initval.h>
 
@@ -305,19 +306,6 @@ static int loopback_trigger(struct snd_pcm_substream *substream, int cmd)
        return 0;
 }
 
-static void params_change_substream(struct loopback_pcm *dpcm,
-                                   struct snd_pcm_runtime *runtime)
-{
-       struct snd_pcm_runtime *dst_runtime;
-
-       if (dpcm == NULL || dpcm->substream == NULL)
-               return;
-       dst_runtime = dpcm->substream->runtime;
-       if (dst_runtime == NULL)
-               return;
-       dst_runtime->hw = dpcm->cable->hw;
-}
-
 static void params_change(struct snd_pcm_substream *substream)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
@@ -329,10 +317,6 @@ static void params_change(struct snd_pcm_substream *substream)
        cable->hw.rate_max = runtime->rate;
        cable->hw.channels_min = runtime->channels;
        cable->hw.channels_max = runtime->channels;
-       params_change_substream(cable->streams[SNDRV_PCM_STREAM_PLAYBACK],
-                               runtime);
-       params_change_substream(cable->streams[SNDRV_PCM_STREAM_CAPTURE],
-                               runtime);
 }
 
 static int loopback_prepare(struct snd_pcm_substream *substream)
@@ -620,26 +604,29 @@ static unsigned int get_cable_index(struct snd_pcm_substream *substream)
 static int rule_format(struct snd_pcm_hw_params *params,
                       struct snd_pcm_hw_rule *rule)
 {
+       struct loopback_pcm *dpcm = rule->private;
+       struct loopback_cable *cable = dpcm->cable;
+       struct snd_mask m;
 
-       struct snd_pcm_hardware *hw = rule->private;
-       struct snd_mask *maskp = hw_param_mask(params, rule->var);
-
-       maskp->bits[0] &= (u_int32_t)hw->formats;
-       maskp->bits[1] &= (u_int32_t)(hw->formats >> 32);
-       memset(maskp->bits + 2, 0, (SNDRV_MASK_MAX-64) / 8); /* clear rest */
-       if (! maskp->bits[0] && ! maskp->bits[1])
-               return -EINVAL;
-       return 0;
+       snd_mask_none(&m);
+       mutex_lock(&dpcm->loopback->cable_lock);
+       m.bits[0] = (u_int32_t)cable->hw.formats;
+       m.bits[1] = (u_int32_t)(cable->hw.formats >> 32);
+       mutex_unlock(&dpcm->loopback->cable_lock);
+       return snd_mask_refine(hw_param_mask(params, rule->var), &m);
 }
 
 static int rule_rate(struct snd_pcm_hw_params *params,
                     struct snd_pcm_hw_rule *rule)
 {
-       struct snd_pcm_hardware *hw = rule->private;
+       struct loopback_pcm *dpcm = rule->private;
+       struct loopback_cable *cable = dpcm->cable;
        struct snd_interval t;
 
-        t.min = hw->rate_min;
-        t.max = hw->rate_max;
+       mutex_lock(&dpcm->loopback->cable_lock);
+       t.min = cable->hw.rate_min;
+       t.max = cable->hw.rate_max;
+       mutex_unlock(&dpcm->loopback->cable_lock);
         t.openmin = t.openmax = 0;
         t.integer = 0;
        return snd_interval_refine(hw_param_interval(params, rule->var), &t);
@@ -648,22 +635,44 @@ static int rule_rate(struct snd_pcm_hw_params *params,
 static int rule_channels(struct snd_pcm_hw_params *params,
                         struct snd_pcm_hw_rule *rule)
 {
-       struct snd_pcm_hardware *hw = rule->private;
+       struct loopback_pcm *dpcm = rule->private;
+       struct loopback_cable *cable = dpcm->cable;
        struct snd_interval t;
 
-        t.min = hw->channels_min;
-        t.max = hw->channels_max;
+       mutex_lock(&dpcm->loopback->cable_lock);
+       t.min = cable->hw.channels_min;
+       t.max = cable->hw.channels_max;
+       mutex_unlock(&dpcm->loopback->cable_lock);
         t.openmin = t.openmax = 0;
         t.integer = 0;
        return snd_interval_refine(hw_param_interval(params, rule->var), &t);
 }
 
+static void free_cable(struct snd_pcm_substream *substream)
+{
+       struct loopback *loopback = substream->private_data;
+       int dev = get_cable_index(substream);
+       struct loopback_cable *cable;
+
+       cable = loopback->cables[substream->number][dev];
+       if (!cable)
+               return;
+       if (cable->streams[!substream->stream]) {
+               /* other stream is still alive */
+               cable->streams[substream->stream] = NULL;
+       } else {
+               /* free the cable */
+               loopback->cables[substream->number][dev] = NULL;
+               kfree(cable);
+       }
+}
+
 static int loopback_open(struct snd_pcm_substream *substream)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct loopback *loopback = substream->private_data;
        struct loopback_pcm *dpcm;
-       struct loopback_cable *cable;
+       struct loopback_cable *cable = NULL;
        int err = 0;
        int dev = get_cable_index(substream);
 
@@ -681,7 +690,6 @@ static int loopback_open(struct snd_pcm_substream *substream)
        if (!cable) {
                cable = kzalloc(sizeof(*cable), GFP_KERNEL);
                if (!cable) {
-                       kfree(dpcm);
                        err = -ENOMEM;
                        goto unlock;
                }
@@ -699,19 +707,19 @@ static int loopback_open(struct snd_pcm_substream *substream)
        /* are cached -> they do not reflect the actual state */
        err = snd_pcm_hw_rule_add(runtime, 0,
                                  SNDRV_PCM_HW_PARAM_FORMAT,
-                                 rule_format, &runtime->hw,
+                                 rule_format, dpcm,
                                  SNDRV_PCM_HW_PARAM_FORMAT, -1);
        if (err < 0)
                goto unlock;
        err = snd_pcm_hw_rule_add(runtime, 0,
                                  SNDRV_PCM_HW_PARAM_RATE,
-                                 rule_rate, &runtime->hw,
+                                 rule_rate, dpcm,
                                  SNDRV_PCM_HW_PARAM_RATE, -1);
        if (err < 0)
                goto unlock;
        err = snd_pcm_hw_rule_add(runtime, 0,
                                  SNDRV_PCM_HW_PARAM_CHANNELS,
-                                 rule_channels, &runtime->hw,
+                                 rule_channels, dpcm,
                                  SNDRV_PCM_HW_PARAM_CHANNELS, -1);
        if (err < 0)
                goto unlock;
@@ -723,6 +731,10 @@ static int loopback_open(struct snd_pcm_substream *substream)
        else
                runtime->hw = cable->hw;
  unlock:
+       if (err < 0) {
+               free_cable(substream);
+               kfree(dpcm);
+       }
        mutex_unlock(&loopback->cable_lock);
        return err;
 }
@@ -731,20 +743,10 @@ static int loopback_close(struct snd_pcm_substream *substream)
 {
        struct loopback *loopback = substream->private_data;
        struct loopback_pcm *dpcm = substream->runtime->private_data;
-       struct loopback_cable *cable;
-       int dev = get_cable_index(substream);
 
        loopback_timer_stop(dpcm);
        mutex_lock(&loopback->cable_lock);
-       cable = loopback->cables[substream->number][dev];
-       if (cable->streams[!substream->stream]) {
-               /* other stream is still alive */
-               cable->streams[substream->stream] = NULL;
-       } else {
-               /* free the cable */
-               loopback->cables[substream->number][dev] = NULL;
-               kfree(cable);
-       }
+       free_cable(substream);
        mutex_unlock(&loopback->cable_lock);
        return 0;
 }
index 038a180d3f8117a7455ca6914ed883173aed8863..cbe818eda3363c0f0595c3496e980f64343776d6 100644 (file)
@@ -325,7 +325,7 @@ static int hdac_component_master_match(struct device *dev, void *data)
  */
 int snd_hdac_i915_register_notifier(const struct i915_audio_component_audio_ops *aops)
 {
-       if (WARN_ON(!hdac_acomp))
+       if (!hdac_acomp)
                return -ENODEV;
 
        hdac_acomp->audio_ops = aops;
index a81aacf684b26341ec9257366d7c83a47962a16d..37e1cf8218ff0f864de4635d0188ed5b9b91d73c 100644 (file)
@@ -271,6 +271,8 @@ enum {
        CXT_FIXUP_HP_SPECTRE,
        CXT_FIXUP_HP_GATE_MIC,
        CXT_FIXUP_MUTE_LED_GPIO,
+       CXT_FIXUP_HEADSET_MIC,
+       CXT_FIXUP_HP_MIC_NO_PRESENCE,
 };
 
 /* for hda_fixup_thinkpad_acpi() */
@@ -350,6 +352,18 @@ static void cxt_fixup_headphone_mic(struct hda_codec *codec,
        }
 }
 
+static void cxt_fixup_headset_mic(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct conexant_spec *spec = codec->spec;
+
+       switch (action) {
+       case HDA_FIXUP_ACT_PRE_PROBE:
+               spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
+               break;
+       }
+}
+
 /* OPLC XO 1.5 fixup */
 
 /* OLPC XO-1.5 supports DC input mode (e.g. for use with analog sensors)
@@ -880,6 +894,19 @@ static const struct hda_fixup cxt_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cxt_fixup_mute_led_gpio,
        },
+       [CXT_FIXUP_HEADSET_MIC] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cxt_fixup_headset_mic,
+       },
+       [CXT_FIXUP_HP_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1a, 0x02a1113c },
+                       { }
+               },
+               .chained = true,
+               .chain_id = CXT_FIXUP_HEADSET_MIC,
+       },
 };
 
 static const struct snd_pci_quirk cxt5045_fixups[] = {
@@ -934,6 +961,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
        SND_PCI_QUIRK(0x103c, 0x8115, "HP Z1 Gen3", CXT_FIXUP_HP_GATE_MIC),
        SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO),
        SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO),
+       SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN),
        SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo T400", CXT_PINCFG_LENOVO_TP410),
index c19c81d230bd7423b4153d2266a45e09333f8714..b4f1b6e88305496f91d028ceb82fe9b8a6a60ccb 100644 (file)
@@ -55,10 +55,11 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 #define is_kabylake(codec) ((codec)->core.vendor_id == 0x8086280b)
 #define is_geminilake(codec) (((codec)->core.vendor_id == 0x8086280d) || \
                                ((codec)->core.vendor_id == 0x80862800))
+#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
 #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
                                || is_skylake(codec) || is_broxton(codec) \
-                               || is_kabylake(codec)) || is_geminilake(codec)
-
+                               || is_kabylake(codec)) || is_geminilake(codec) \
+                               || is_cannonlake(codec)
 #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
 #define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -3841,6 +3842,7 @@ HDA_CODEC_ENTRY(0x80862808, "Broadwell HDMI",     patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x80862809, "Skylake HDMI",    patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280a, "Broxton HDMI",    patch_i915_hsw_hdmi),
 HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI",   patch_i915_hsw_hdmi),
+HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862800, "Geminilake HDMI", patch_i915_glk_hdmi),
 HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
index 4b21f71d685c78fd00345b2e229541b493c614f0..8fd2d9c62c96ce53a78dced9d0cd3529961978a9 100644 (file)
@@ -324,8 +324,12 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0292:
                alc_update_coef_idx(codec, 0x4, 1<<15, 0);
                break;
-       case 0x10ec0215:
        case 0x10ec0225:
+       case 0x10ec0295:
+       case 0x10ec0299:
+               alc_update_coef_idx(codec, 0x67, 0xf000, 0x3000);
+               /* fallthrough */
+       case 0x10ec0215:
        case 0x10ec0233:
        case 0x10ec0236:
        case 0x10ec0255:
@@ -336,10 +340,8 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
        case 0x10ec0286:
        case 0x10ec0288:
        case 0x10ec0285:
-       case 0x10ec0295:
        case 0x10ec0298:
        case 0x10ec0289:
-       case 0x10ec0299:
                alc_update_coef_idx(codec, 0x10, 1<<9, 0);
                break;
        case 0x10ec0275:
@@ -5185,6 +5187,22 @@ static void alc233_alc662_fixup_lenovo_dual_codecs(struct hda_codec *codec,
        }
 }
 
+/* Forcibly assign NID 0x03 to HP/LO while NID 0x02 to SPK for EQ */
+static void alc274_fixup_bind_dacs(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+       static hda_nid_t preferred_pairs[] = {
+               0x21, 0x03, 0x1b, 0x03, 0x16, 0x02,
+               0
+       };
+
+       if (action != HDA_FIXUP_ACT_PRE_PROBE)
+               return;
+
+       spec->gen.preferred_dacs = preferred_pairs;
+}
+
 /* for hda_fixup_thinkpad_acpi() */
 #include "thinkpad_helper.c"
 
@@ -5302,6 +5320,8 @@ enum {
        ALC233_FIXUP_LENOVO_MULTI_CODECS,
        ALC294_FIXUP_LENOVO_MIC_LOCATION,
        ALC700_FIXUP_INTEL_REFERENCE,
+       ALC274_FIXUP_DELL_BIND_DACS,
+       ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6112,6 +6132,21 @@ static const struct hda_fixup alc269_fixups[] = {
                        {}
                }
        },
+       [ALC274_FIXUP_DELL_BIND_DACS] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc274_fixup_bind_dacs,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+       },
+       [ALC274_FIXUP_DELL_AIO_LINEOUT_VERB] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x1b, 0x0401102f },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC274_FIXUP_DELL_BIND_DACS
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -6295,6 +6330,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+       SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
        SND_PCI_QUIRK(0x17aa, 0x3112, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
        SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
@@ -6552,6 +6588,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
        SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x1b, 0x01011020},
                {0x21, 0x02211010}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x12, 0x90a60130},
+               {0x14, 0x90170110},
+               {0x1b, 0x01011020},
+               {0x21, 0x0221101f}),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x12, 0x90a60160},
                {0x14, 0x90170120},
@@ -6578,7 +6619,7 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x14, 0x90170110},
                {0x1b, 0x90a70130},
                {0x21, 0x03211020}),
-       SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+       SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
                {0x12, 0xb7a60130},
                {0x13, 0xb8a61140},
                {0x16, 0x90170110},
index 9f521a55d610186ae78d433909edd26c8dc7035f..b5e41df6bb3a895f10a90756fab0e54954a685d7 100644 (file)
@@ -1051,6 +1051,11 @@ static int acp_audio_probe(struct platform_device *pdev)
        struct resource *res;
        const u32 *pdata = pdev->dev.platform_data;
 
+       if (!pdata) {
+               dev_err(&pdev->dev, "Missing platform data\n");
+               return -ENODEV;
+       }
+
        audio_drv_data = devm_kzalloc(&pdev->dev, sizeof(struct audio_drv_data),
                                        GFP_KERNEL);
        if (audio_drv_data == NULL)
@@ -1058,6 +1063,8 @@ static int acp_audio_probe(struct platform_device *pdev)
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        audio_drv_data->acp_mmio = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(audio_drv_data->acp_mmio))
+               return PTR_ERR(audio_drv_data->acp_mmio);
 
        /* The following members gets populated in device 'open'
         * function. Till then interrupts are disabled in 'acp_init'
index 4a56f3dfba5132a01bbb49eb985540d16a944ecf..dcee145dd17922ac2f81b2fb92a4085d3754e78d 100644 (file)
@@ -64,7 +64,7 @@ config SND_AT91_SOC_SAM9X5_WM8731
 config SND_ATMEL_SOC_CLASSD
        tristate "Atmel ASoC driver for boards using CLASSD"
        depends on ARCH_AT91 || COMPILE_TEST
-       select SND_ATMEL_SOC_DMA
+       select SND_SOC_GENERIC_DMAENGINE_PCM
        select REGMAP_MMIO
        help
          Say Y if you want to add support for Atmel ASoC driver for boards using
index b2d42ec1dcd9f7f75e98c04dd35c06351bf3ca7b..56564ce90cb6b0cb156f08ae39cc46a828f32594 100644 (file)
@@ -2520,7 +2520,7 @@ static struct da7218_pdata *da7218_of_to_pdata(struct snd_soc_codec *codec)
        }
 
        if (da7218->dev_id == DA7218_DEV_ID) {
-               hpldet_np = of_find_node_by_name(np, "da7218_hpldet");
+               hpldet_np = of_get_child_by_name(np, "da7218_hpldet");
                if (!hpldet_np)
                        return pdata;
 
index 5f3c42c4f74ad7d12d5c697c56e028c8329c48cb..066ea2f4ce7b02a8f2cc58c2920b2ef7b6f2b160 100644 (file)
 #define MSM8916_WCD_ANALOG_RATES (SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000 |\
                        SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_48000)
 #define MSM8916_WCD_ANALOG_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
-                                   SNDRV_PCM_FMTBIT_S24_LE)
+                                   SNDRV_PCM_FMTBIT_S32_LE)
 
 static int btn_mask = SND_JACK_BTN_0 | SND_JACK_BTN_1 |
               SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_BTN_4;
index a10a724eb448f4d06bffb034fbc86c68ccecb94d..13354d6304a848759433122e124d83119321bd71 100644 (file)
                                   SNDRV_PCM_RATE_32000 | \
                                   SNDRV_PCM_RATE_48000)
 #define MSM8916_WCD_DIGITAL_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
-                                    SNDRV_PCM_FMTBIT_S24_LE)
+                                    SNDRV_PCM_FMTBIT_S32_LE)
 
 struct msm8916_wcd_digital_priv {
        struct clk *ahbclk, *mclk;
@@ -645,7 +645,7 @@ static int msm8916_wcd_digital_hw_params(struct snd_pcm_substream *substream,
                                    RX_I2S_CTL_RX_I2S_MODE_MASK,
                                    RX_I2S_CTL_RX_I2S_MODE_16);
                break;
-       case SNDRV_PCM_FORMAT_S24_LE:
+       case SNDRV_PCM_FORMAT_S32_LE:
                snd_soc_update_bits(dai->codec, LPASS_CDC_CLK_TX_I2S_CTL,
                                    TX_I2S_CTL_TX_I2S_MODE_MASK,
                                    TX_I2S_CTL_TX_I2S_MODE_32);
index 714ce17da717c0edbc8817aa482dc1f2ec6fbcbe..e853a6dfd33b0ee7b8bf700dffea8b1e477f2d09 100644 (file)
@@ -905,6 +905,7 @@ static int nau8825_adc_event(struct snd_soc_dapm_widget *w,
 
        switch (event) {
        case SND_SOC_DAPM_POST_PMU:
+               msleep(125);
                regmap_update_bits(nau8825->regmap, NAU8825_REG_ENA_CTRL,
                        NAU8825_ENABLE_ADC, NAU8825_ENABLE_ADC);
                break;
index 2df91db765acd6300406aa330cb0d85b3edb7d96..64bf26cec20d535314551ae542f8c28ed25352dc 100644 (file)
@@ -289,6 +289,8 @@ static int rt5514_spi_pcm_probe(struct snd_soc_platform *platform)
                        dev_err(&rt5514_spi->dev,
                                "%s Failed to reguest IRQ: %d\n", __func__,
                                ret);
+               else
+                       device_init_wakeup(rt5514_dsp->dev, true);
        }
 
        return 0;
@@ -456,8 +458,6 @@ static int rt5514_spi_probe(struct spi_device *spi)
                return ret;
        }
 
-       device_init_wakeup(&spi->dev, true);
-
        return 0;
 }
 
@@ -482,10 +482,13 @@ static int __maybe_unused rt5514_resume(struct device *dev)
        if (device_may_wakeup(dev))
                disable_irq_wake(irq);
 
-       if (rt5514_dsp->substream) {
-               rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf, sizeof(buf));
-               if (buf[0] & RT5514_IRQ_STATUS_BIT)
-                       rt5514_schedule_copy(rt5514_dsp);
+       if (rt5514_dsp) {
+               if (rt5514_dsp->substream) {
+                       rt5514_spi_burst_read(RT5514_IRQ_CTRL, (u8 *)&buf,
+                               sizeof(buf));
+                       if (buf[0] & RT5514_IRQ_STATUS_BIT)
+                               rt5514_schedule_copy(rt5514_dsp);
+               }
        }
 
        return 0;
index 2a5b5d74e69714eeb966c6fc793021fcb7cd3a13..2dd6e9f990a4c4c7a2377c5f159987a046220b7a 100644 (file)
@@ -496,7 +496,7 @@ static const struct snd_soc_dapm_widget rt5514_dapm_widgets[] = {
        SND_SOC_DAPM_PGA("DMIC1", SND_SOC_NOPM, 0, 0, NULL, 0),
        SND_SOC_DAPM_PGA("DMIC2", SND_SOC_NOPM, 0, 0, NULL, 0),
 
-       SND_SOC_DAPM_SUPPLY("DMIC CLK", SND_SOC_NOPM, 0, 0,
+       SND_SOC_DAPM_SUPPLY_S("DMIC CLK", 1, SND_SOC_NOPM, 0, 0,
                rt5514_set_dmic_clk, SND_SOC_DAPM_PRE_PMU),
 
        SND_SOC_DAPM_SUPPLY("ADC CLK", RT5514_CLK_CTRL1,
index f020d2d1eef4a98baa8c91442485ee64257c708e..edc152c8a1fe7596e9bbc5760574d973a9185d16 100644 (file)
@@ -3823,6 +3823,8 @@ static int rt5645_i2c_probe(struct i2c_client *i2c,
        regmap_read(regmap, RT5645_VENDOR_ID, &val);
        rt5645->v_id = val & 0xff;
 
+       regmap_write(rt5645->regmap, RT5645_AD_DA_MIXER, 0x8080);
+
        ret = regmap_register_patch(rt5645->regmap, init_list,
                                    ARRAY_SIZE(init_list));
        if (ret != 0)
index b036c9dc0c8cf1883dd550a6755ae61c7a81e87d..d329bf719d80f01e5dc67865e4258f0cca50d6d5 100644 (file)
@@ -1560,6 +1560,10 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert)
                        RT5663_IRQ_POW_SAV_MASK, RT5663_IRQ_POW_SAV_EN);
                snd_soc_update_bits(codec, RT5663_IRQ_1,
                        RT5663_EN_IRQ_JD1_MASK, RT5663_EN_IRQ_JD1_EN);
+               snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
+                       RT5663_EM_JD_MASK, RT5663_EM_JD_RST);
+               snd_soc_update_bits(codec, RT5663_EM_JACK_TYPE_1,
+                       RT5663_EM_JD_MASK, RT5663_EM_JD_NOR);
 
                while (true) {
                        regmap_read(rt5663->regmap, RT5663_INT_ST_2, &val);
index c5a9b69579ad216d9b039359910e8c10acfa1832..03adc8004ba98d73da3f132d23285fdae77bf72d 100644 (file)
 #define RT5663_POL_EXT_JD_SHIFT                        10
 #define RT5663_POL_EXT_JD_EN                   (0x1 << 10)
 #define RT5663_POL_EXT_JD_DIS                  (0x0 << 10)
+#define RT5663_EM_JD_MASK                      (0x1 << 7)
+#define RT5663_EM_JD_SHIFT                     7
+#define RT5663_EM_JD_NOR                       (0x1 << 7)
+#define RT5663_EM_JD_RST                       (0x0 << 7)
 
 /* DACREF LDO Control (0x0112)*/
 #define RT5663_PWR_LDO_DACREFL_MASK            (0x1 << 9)
index 730fb2058869978b3f1b5281b123844a44458c42..1ff3edb7bbb6b28eb45c99b7b14baeadb46651ec 100644 (file)
@@ -116,7 +116,7 @@ struct aic31xx_pdata {
 /* INT2 interrupt control */
 #define AIC31XX_INT2CTRL       AIC31XX_REG(0, 49)
 /* GPIO1 control */
-#define AIC31XX_GPIO1          AIC31XX_REG(0, 50)
+#define AIC31XX_GPIO1          AIC31XX_REG(0, 51)
 
 #define AIC31XX_DACPRB         AIC31XX_REG(0, 60)
 /* ADC Instruction Set Register */
index c482b2e7a7d2a55d7ead20e56a680da6c0791378..cfe72b9d4356069327e80d592ff1e660bca39258 100644 (file)
@@ -232,7 +232,7 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
        struct twl4030_codec_data *pdata = dev_get_platdata(codec->dev);
        struct device_node *twl4030_codec_node = NULL;
 
-       twl4030_codec_node = of_find_node_by_name(codec->dev->parent->of_node,
+       twl4030_codec_node = of_get_child_by_name(codec->dev->parent->of_node,
                                                  "codec");
 
        if (!pdata && twl4030_codec_node) {
@@ -241,9 +241,11 @@ static struct twl4030_codec_data *twl4030_get_pdata(struct snd_soc_codec *codec)
                                     GFP_KERNEL);
                if (!pdata) {
                        dev_err(codec->dev, "Can not allocate memory\n");
+                       of_node_put(twl4030_codec_node);
                        return NULL;
                }
                twl4030_setup_pdata_of(pdata, twl4030_codec_node);
+               of_node_put(twl4030_codec_node);
        }
 
        return pdata;
index 65c059b5ffd784066fd6d66ae74dae044b721eeb..66e32f5d2917f2f0b958c124b5f4a0eeca296c10 100644 (file)
@@ -1733,7 +1733,7 @@ static int wm_adsp_load(struct wm_adsp *dsp)
                 le64_to_cpu(footer->timestamp));
 
        while (pos < firmware->size &&
-              pos - firmware->size > sizeof(*region)) {
+              sizeof(*region) < firmware->size - pos) {
                region = (void *)&(firmware->data[pos]);
                region_name = "Unknown";
                reg = 0;
@@ -1782,8 +1782,8 @@ static int wm_adsp_load(struct wm_adsp *dsp)
                         regions, le32_to_cpu(region->len), offset,
                         region_name);
 
-               if ((pos + le32_to_cpu(region->len) + sizeof(*region)) >
-                   firmware->size) {
+               if (le32_to_cpu(region->len) >
+                   firmware->size - pos - sizeof(*region)) {
                        adsp_err(dsp,
                                 "%s.%d: %s region len %d bytes exceeds file length %zu\n",
                                 file, regions, region_name,
@@ -2253,7 +2253,7 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
 
        blocks = 0;
        while (pos < firmware->size &&
-              pos - firmware->size > sizeof(*blk)) {
+              sizeof(*blk) < firmware->size - pos) {
                blk = (void *)(&firmware->data[pos]);
 
                type = le16_to_cpu(blk->type);
@@ -2327,8 +2327,8 @@ static int wm_adsp_load_coeff(struct wm_adsp *dsp)
                }
 
                if (reg) {
-                       if ((pos + le32_to_cpu(blk->len) + sizeof(*blk)) >
-                           firmware->size) {
+                       if (le32_to_cpu(blk->len) >
+                           firmware->size - pos - sizeof(*blk)) {
                                adsp_err(dsp,
                                         "%s.%d: %s region len %d bytes exceeds file length %zu\n",
                                         file, blocks, region_name,
index 0f163abe4ba37d93bec022afd0842b20423f2361..52c27a358933b100d85b2392abe2d2fc4501dd3a 100644 (file)
 #define ASRFSTi_OUTPUT_FIFO_SHIFT      12
 #define ASRFSTi_OUTPUT_FIFO_MASK       (((1 << ASRFSTi_OUTPUT_FIFO_WIDTH) - 1) << ASRFSTi_OUTPUT_FIFO_SHIFT)
 #define ASRFSTi_IAEi_SHIFT             11
-#define ASRFSTi_IAEi_MASK              (1 << ASRFSTi_OAFi_SHIFT)
-#define ASRFSTi_IAEi                   (1 << ASRFSTi_OAFi_SHIFT)
+#define ASRFSTi_IAEi_MASK              (1 << ASRFSTi_IAEi_SHIFT)
+#define ASRFSTi_IAEi                   (1 << ASRFSTi_IAEi_SHIFT)
 #define ASRFSTi_INPUT_FIFO_WIDTH       7
 #define ASRFSTi_INPUT_FIFO_SHIFT       0
 #define ASRFSTi_INPUT_FIFO_MASK                ((1 << ASRFSTi_INPUT_FIFO_WIDTH) - 1)
index f2f51e06e22cc8b6ff3f9e4f21046be256bd1265..424bafaf51efe63e108fcd83ce3c6bda43f9e92c 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/ctype.h>
 #include <linux/device.h>
 #include <linux/delay.h>
+#include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/of.h>
@@ -265,6 +266,8 @@ struct fsl_ssi_private {
 
        u32 fifo_watermark;
        u32 dma_maxburst;
+
+       struct mutex ac97_reg_lock;
 };
 
 /*
@@ -1260,11 +1263,13 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
        if (reg > 0x7f)
                return;
 
+       mutex_lock(&fsl_ac97_data->ac97_reg_lock);
+
        ret = clk_prepare_enable(fsl_ac97_data->clk);
        if (ret) {
                pr_err("ac97 write clk_prepare_enable failed: %d\n",
                        ret);
-               return;
+               goto ret_unlock;
        }
 
        lreg = reg <<  12;
@@ -1278,6 +1283,9 @@ static void fsl_ssi_ac97_write(struct snd_ac97 *ac97, unsigned short reg,
        udelay(100);
 
        clk_disable_unprepare(fsl_ac97_data->clk);
+
+ret_unlock:
+       mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
 }
 
 static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
@@ -1285,16 +1293,18 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
 {
        struct regmap *regs = fsl_ac97_data->regs;
 
-       unsigned short val = -1;
+       unsigned short val = 0;
        u32 reg_val;
        unsigned int lreg;
        int ret;
 
+       mutex_lock(&fsl_ac97_data->ac97_reg_lock);
+
        ret = clk_prepare_enable(fsl_ac97_data->clk);
        if (ret) {
                pr_err("ac97 read clk_prepare_enable failed: %d\n",
                        ret);
-               return -1;
+               goto ret_unlock;
        }
 
        lreg = (reg & 0x7f) <<  12;
@@ -1309,6 +1319,8 @@ static unsigned short fsl_ssi_ac97_read(struct snd_ac97 *ac97,
 
        clk_disable_unprepare(fsl_ac97_data->clk);
 
+ret_unlock:
+       mutex_unlock(&fsl_ac97_data->ac97_reg_lock);
        return val;
 }
 
@@ -1458,12 +1470,6 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                                sizeof(fsl_ssi_ac97_dai));
 
                fsl_ac97_data = ssi_private;
-
-               ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
-               if (ret) {
-                       dev_err(&pdev->dev, "could not set AC'97 ops\n");
-                       return ret;
-               }
        } else {
                /* Initialize this copy of the CPU DAI driver structure */
                memcpy(&ssi_private->cpu_dai_drv, &fsl_ssi_dai_template,
@@ -1574,6 +1580,15 @@ static int fsl_ssi_probe(struct platform_device *pdev)
                        return ret;
        }
 
+       if (fsl_ssi_is_ac97(ssi_private)) {
+               mutex_init(&ssi_private->ac97_reg_lock);
+               ret = snd_soc_set_ac97_ops_of_reset(&fsl_ssi_ac97_ops, pdev);
+               if (ret) {
+                       dev_err(&pdev->dev, "could not set AC'97 ops\n");
+                       goto error_ac97_ops;
+               }
+       }
+
        ret = devm_snd_soc_register_component(&pdev->dev, &fsl_ssi_component,
                                              &ssi_private->cpu_dai_drv, 1);
        if (ret) {
@@ -1657,6 +1672,13 @@ error_sound_card:
        fsl_ssi_debugfs_remove(&ssi_private->dbg_stats);
 
 error_asoc_register:
+       if (fsl_ssi_is_ac97(ssi_private))
+               snd_soc_set_ac97_ops(NULL);
+
+error_ac97_ops:
+       if (fsl_ssi_is_ac97(ssi_private))
+               mutex_destroy(&ssi_private->ac97_reg_lock);
+
        if (ssi_private->soc->imx)
                fsl_ssi_imx_clean(pdev, ssi_private);
 
@@ -1675,8 +1697,10 @@ static int fsl_ssi_remove(struct platform_device *pdev)
        if (ssi_private->soc->imx)
                fsl_ssi_imx_clean(pdev, ssi_private);
 
-       if (fsl_ssi_is_ac97(ssi_private))
+       if (fsl_ssi_is_ac97(ssi_private)) {
                snd_soc_set_ac97_ops(NULL);
+               mutex_destroy(&ssi_private->ac97_reg_lock);
+       }
 
        return 0;
 }
index 6f9a8bcf20f3ebb4407a2452eed8870a11b40c02..6dcad0a8a0d045969873d983fa8b89bbe1f91e96 100644 (file)
@@ -101,7 +101,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
        { "ssp0 Tx", NULL, "spk_out" },
 
        { "AIF Playback", NULL, "ssp1 Tx" },
-       { "ssp1 Tx", NULL, "hs_out" },
+       { "ssp1 Tx", NULL, "codec1_out" },
 
        { "hs_in", NULL, "ssp1 Rx" },
        { "ssp1 Rx", NULL, "AIF Capture" },
index 6072164f2d43db7c7f8f50000892f6e61e80d79c..271ae3c2c5354c5788aa9383772ebbe63b4e8d9a 100644 (file)
@@ -109,7 +109,7 @@ static const struct snd_soc_dapm_route kabylake_map[] = {
        { "ssp0 Tx", NULL, "spk_out" },
 
        { "AIF Playback", NULL, "ssp1 Tx" },
-       { "ssp1 Tx", NULL, "hs_out" },
+       { "ssp1 Tx", NULL, "codec1_out" },
 
        { "hs_in", NULL, "ssp1 Rx" },
        { "ssp1 Rx", NULL, "AIF Capture" },
index d14c50a602894c4ad8aa76f2259839552aff2f6a..3eaac41090ca7f8b07ed99511d58cbc1f5498c56 100644 (file)
@@ -119,11 +119,16 @@ static bool skl_check_ep_match(struct device *dev, struct nhlt_endpoint *epnt,
 
        if ((epnt->virtual_bus_id == instance_id) &&
                        (epnt->linktype == link_type) &&
-                       (epnt->direction == dirn) &&
-                       (epnt->device_type == dev_type))
-               return true;
-       else
-               return false;
+                       (epnt->direction == dirn)) {
+               /* do not check dev_type for DMIC link type */
+               if (epnt->linktype == NHLT_LINK_DMIC)
+                       return true;
+
+               if (epnt->device_type == dev_type)
+                       return true;
+       }
+
+       return false;
 }
 
 struct nhlt_specific_cfg
index a072bcf209d2aa4c9c72503466e027e6867cd9bb..81923da18ac2259ad159b8c0282242cc7c94c53c 100644 (file)
@@ -2908,7 +2908,7 @@ static int skl_tplg_control_load(struct snd_soc_component *cmpnt,
                break;
 
        default:
-               dev_warn(bus->dev, "Control load not supported %d:%d:%d\n",
+               dev_dbg(bus->dev, "Control load not supported %d:%d:%d\n",
                        hdr->ops.get, hdr->ops.put, hdr->ops.info);
                break;
        }
index ee5055d47d13d0ba6f63417947c63f731bcac4a8..a89fe9b6463ba6b56d88c9afa6ae3fdd1397e6ae 100644 (file)
@@ -322,26 +322,30 @@ static int rk_spdif_probe(struct platform_device *pdev)
        spdif->mclk = devm_clk_get(&pdev->dev, "mclk");
        if (IS_ERR(spdif->mclk)) {
                dev_err(&pdev->dev, "Can't retrieve rk_spdif master clock\n");
-               return PTR_ERR(spdif->mclk);
+               ret = PTR_ERR(spdif->mclk);
+               goto err_disable_hclk;
        }
 
        ret = clk_prepare_enable(spdif->mclk);
        if (ret) {
                dev_err(spdif->dev, "clock enable failed %d\n", ret);
-               return ret;
+               goto err_disable_clocks;
        }
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        regs = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(regs))
-               return PTR_ERR(regs);
+       if (IS_ERR(regs)) {
+               ret = PTR_ERR(regs);
+               goto err_disable_clocks;
+       }
 
        spdif->regmap = devm_regmap_init_mmio_clk(&pdev->dev, "hclk", regs,
                                                  &rk_spdif_regmap_config);
        if (IS_ERR(spdif->regmap)) {
                dev_err(&pdev->dev,
                        "Failed to initialise managed register map\n");
-               return PTR_ERR(spdif->regmap);
+               ret = PTR_ERR(spdif->regmap);
+               goto err_disable_clocks;
        }
 
        spdif->playback_dma_data.addr = res->start + SPDIF_SMPDR;
@@ -373,6 +377,10 @@ static int rk_spdif_probe(struct platform_device *pdev)
 
 err_pm_runtime:
        pm_runtime_disable(&pdev->dev);
+err_disable_clocks:
+       clk_disable_unprepare(spdif->mclk);
+err_disable_hclk:
+       clk_disable_unprepare(spdif->hclk);
 
        return ret;
 }
index 8ddb08714faabb5bc2b58716155b0ecec501db0d..4672688cac325ce79970c620c5f9e6d2d81b3525 100644 (file)
@@ -222,7 +222,7 @@ int rsnd_adg_set_cmd_timsel_gen2(struct rsnd_mod *cmd_mod,
                                   NULL, &val, NULL);
 
        val  = val      << shift;
-       mask = 0xffff   << shift;
+       mask = 0x0f1f   << shift;
 
        rsnd_mod_bset(adg_mod, CMDOUT_TIMSEL, mask, val);
 
@@ -250,7 +250,7 @@ int rsnd_adg_set_src_timesel_gen2(struct rsnd_mod *src_mod,
 
        in   = in       << shift;
        out  = out      << shift;
-       mask = 0xffff   << shift;
+       mask = 0x0f1f   << shift;
 
        switch (id / 2) {
        case 0:
@@ -380,7 +380,7 @@ int rsnd_adg_ssi_clk_try_start(struct rsnd_mod *ssi_mod, unsigned int rate)
                        ckr = 0x80000000;
        }
 
-       rsnd_mod_bset(adg_mod, BRGCKR, 0x80FF0000, adg->ckr | ckr);
+       rsnd_mod_bset(adg_mod, BRGCKR, 0x80770000, adg->ckr | ckr);
        rsnd_mod_write(adg_mod, BRRA,  adg->rbga);
        rsnd_mod_write(adg_mod, BRRB,  adg->rbgb);
 
index c70eb20978163d6952a6764381d083779dbd758b..f12a88a21dfa24d32f04dad32635dee2482a9337 100644 (file)
@@ -1332,8 +1332,8 @@ static int rsnd_pcm_new(struct snd_soc_pcm_runtime *rtd)
 
        return snd_pcm_lib_preallocate_pages_for_all(
                rtd->pcm,
-               SNDRV_DMA_TYPE_CONTINUOUS,
-               snd_dma_continuous_data(GFP_KERNEL),
+               SNDRV_DMA_TYPE_DEV,
+               rtd->card->snd_card->dev,
                PREALLOC_BUFFER, PREALLOC_BUFFER_MAX);
 }
 
index fd557abfe390a1ea6c6800e9e9836d039bdbcee3..4d750bdf8e2449981537a4b6d41bea33af321789 100644 (file)
 struct rsnd_dmaen {
        struct dma_chan         *chan;
        dma_cookie_t            cookie;
-       dma_addr_t              dma_buf;
        unsigned int            dma_len;
-       unsigned int            dma_period;
-       unsigned int            dma_cnt;
 };
 
 struct rsnd_dmapp {
@@ -71,38 +68,10 @@ static struct rsnd_mod mem = {
 /*
  *             Audio DMAC
  */
-#define rsnd_dmaen_sync(dmaen, io, i)  __rsnd_dmaen_sync(dmaen, io, i, 1)
-#define rsnd_dmaen_unsync(dmaen, io, i)        __rsnd_dmaen_sync(dmaen, io, i, 0)
-static void __rsnd_dmaen_sync(struct rsnd_dmaen *dmaen, struct rsnd_dai_stream *io,
-                             int i, int sync)
-{
-       struct device *dev = dmaen->chan->device->dev;
-       enum dma_data_direction dir;
-       int is_play = rsnd_io_is_play(io);
-       dma_addr_t buf;
-       int len, max;
-       size_t period;
-
-       len     = dmaen->dma_len;
-       period  = dmaen->dma_period;
-       max     = len / period;
-       i       = i % max;
-       buf     = dmaen->dma_buf + (period * i);
-
-       dir = is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-
-       if (sync)
-               dma_sync_single_for_device(dev, buf, period, dir);
-       else
-               dma_sync_single_for_cpu(dev, buf, period, dir);
-}
-
 static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
                                  struct rsnd_dai_stream *io)
 {
        struct rsnd_priv *priv = rsnd_mod_to_priv(mod);
-       struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
-       struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
        bool elapsed = false;
        unsigned long flags;
 
@@ -115,22 +84,9 @@ static void __rsnd_dmaen_complete(struct rsnd_mod *mod,
         */
        spin_lock_irqsave(&priv->lock, flags);
 
-       if (rsnd_io_is_working(io)) {
-               rsnd_dmaen_unsync(dmaen, io, dmaen->dma_cnt);
-
-               /*
-                * Next period is already started.
-                * Let's sync Next Next period
-                * see
-                *      rsnd_dmaen_start()
-                */
-               rsnd_dmaen_sync(dmaen, io, dmaen->dma_cnt + 2);
-
+       if (rsnd_io_is_working(io))
                elapsed = true;
 
-               dmaen->dma_cnt++;
-       }
-
        spin_unlock_irqrestore(&priv->lock, flags);
 
        if (elapsed)
@@ -165,14 +121,8 @@ static int rsnd_dmaen_stop(struct rsnd_mod *mod,
        struct rsnd_dma *dma = rsnd_mod_to_dma(mod);
        struct rsnd_dmaen *dmaen = rsnd_dma_to_dmaen(dma);
 
-       if (dmaen->chan) {
-               int is_play = rsnd_io_is_play(io);
-
+       if (dmaen->chan)
                dmaengine_terminate_all(dmaen->chan);
-               dma_unmap_single(dmaen->chan->device->dev,
-                                dmaen->dma_buf, dmaen->dma_len,
-                                is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-       }
 
        return 0;
 }
@@ -237,11 +187,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
        struct device *dev = rsnd_priv_to_dev(priv);
        struct dma_async_tx_descriptor *desc;
        struct dma_slave_config cfg = {};
-       dma_addr_t buf;
-       size_t len;
-       size_t period;
        int is_play = rsnd_io_is_play(io);
-       int i;
        int ret;
 
        cfg.direction   = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
@@ -258,19 +204,10 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
        if (ret < 0)
                return ret;
 
-       len     = snd_pcm_lib_buffer_bytes(substream);
-       period  = snd_pcm_lib_period_bytes(substream);
-       buf     = dma_map_single(dmaen->chan->device->dev,
-                                substream->runtime->dma_area,
-                                len,
-                                is_play ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-       if (dma_mapping_error(dmaen->chan->device->dev, buf)) {
-               dev_err(dev, "dma map failed\n");
-               return -EIO;
-       }
-
        desc = dmaengine_prep_dma_cyclic(dmaen->chan,
-                                        buf, len, period,
+                                        substream->runtime->dma_addr,
+                                        snd_pcm_lib_buffer_bytes(substream),
+                                        snd_pcm_lib_period_bytes(substream),
                                         is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM,
                                         DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 
@@ -282,18 +219,7 @@ static int rsnd_dmaen_start(struct rsnd_mod *mod,
        desc->callback          = rsnd_dmaen_complete;
        desc->callback_param    = rsnd_mod_get(dma);
 
-       dmaen->dma_buf          = buf;
-       dmaen->dma_len          = len;
-       dmaen->dma_period       = period;
-       dmaen->dma_cnt          = 0;
-
-       /*
-        * synchronize this and next period
-        * see
-        *      __rsnd_dmaen_complete()
-        */
-       for (i = 0; i < 2; i++)
-               rsnd_dmaen_sync(dmaen, io, i);
+       dmaen->dma_len          = snd_pcm_lib_buffer_bytes(substream);
 
        dmaen->cookie = dmaengine_submit(desc);
        if (dmaen->cookie < 0) {
index fece1e5f582f35ab5e558a0b66b3b558816048d6..cbf3bf312d23bda9427747c1af2dde30d9fcf43d 100644 (file)
@@ -446,25 +446,29 @@ static bool rsnd_ssi_pointer_update(struct rsnd_mod *mod,
                                    int byte)
 {
        struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
+       bool ret = false;
+       int byte_pos;
 
-       ssi->byte_pos += byte;
+       byte_pos = ssi->byte_pos + byte;
 
-       if (ssi->byte_pos >= ssi->next_period_byte) {
+       if (byte_pos >= ssi->next_period_byte) {
                struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 
                ssi->period_pos++;
                ssi->next_period_byte += ssi->byte_per_period;
 
                if (ssi->period_pos >= runtime->periods) {
-                       ssi->byte_pos = 0;
+                       byte_pos = 0;
                        ssi->period_pos = 0;
                        ssi->next_period_byte = ssi->byte_per_period;
                }
 
-               return true;
+               ret = true;
        }
 
-       return false;
+       WRITE_ONCE(ssi->byte_pos, byte_pos);
+
+       return ret;
 }
 
 /*
@@ -838,7 +842,7 @@ static int rsnd_ssi_pointer(struct rsnd_mod *mod,
        struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod);
        struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io);
 
-       *pointer = bytes_to_frames(runtime, ssi->byte_pos);
+       *pointer = bytes_to_frames(runtime, READ_ONCE(ssi->byte_pos));
 
        return 0;
 }
index 4d948757d300d04be3bfca3c63f78c68085a8506..6ff8a36c2c82224da8ae88c94b317092ea87f88f 100644 (file)
@@ -125,6 +125,7 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
 {
        int hdmi = rsnd_ssi_hdmi_port(io);
        int ret;
+       u32 mode = 0;
 
        ret = rsnd_ssiu_init(mod, io, priv);
        if (ret < 0)
@@ -136,9 +137,11 @@ static int rsnd_ssiu_init_gen2(struct rsnd_mod *mod,
                 * see
                 *      rsnd_ssi_config_init()
                 */
-               rsnd_mod_write(mod, SSI_MODE, 0x1);
+               mode = 0x1;
        }
 
+       rsnd_mod_write(mod, SSI_MODE, mode);
+
        if (rsnd_ssi_use_busif(io)) {
                rsnd_mod_write(mod, SSI_BUSIF_ADINR,
                               rsnd_get_adinr_bit(mod, io) |
index 7c9e361b2200be081aca8f5a99d1b71a5846d30b..2b4ceda36291c01c6cca69d3a1cacd6c23014f40 100644 (file)
@@ -2173,20 +2173,25 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid,
        kctl->private_value = (unsigned long)namelist;
        kctl->private_free = usb_mixer_selector_elem_free;
 
-       nameid = uac_selector_unit_iSelector(desc);
+       /* check the static mapping table at first */
        len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
-       if (len)
-               ;
-       else if (nameid)
-               len = snd_usb_copy_string_desc(state, nameid, kctl->id.name,
-                                        sizeof(kctl->id.name));
-       else
-               len = get_term_name(state, &state->oterm,
-                                   kctl->id.name, sizeof(kctl->id.name), 0);
-
        if (!len) {
-               strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
+               /* no mapping ? */
+               /* if iSelector is given, use it */
+               nameid = uac_selector_unit_iSelector(desc);
+               if (nameid)
+                       len = snd_usb_copy_string_desc(state, nameid,
+                                                      kctl->id.name,
+                                                      sizeof(kctl->id.name));
+               /* ... or pick up the terminal name at next */
+               if (!len)
+                       len = get_term_name(state, &state->oterm,
+                                   kctl->id.name, sizeof(kctl->id.name), 0);
+               /* ... or use the fixed string "USB" as the last resort */
+               if (!len)
+                       strlcpy(kctl->id.name, "USB", sizeof(kctl->id.name));
 
+               /* and add the proper suffix */
                if (desc->bDescriptorSubtype == UAC2_CLOCK_SELECTOR)
                        append_ctl_name(kctl, " Clock Source");
                else if ((state->oterm.type & 0xff00) == 0x0100)
index 77eecaa4db1f32c9b7af87273c599181bf307443..a66ef5777887a78d7416e64c049c73b26477c7f7 100644 (file)
@@ -1166,10 +1166,11 @@ static bool is_marantz_denon_dac(unsigned int id)
 /* TEAC UD-501/UD-503/NT-503 USB DACs need a vendor cmd to switch
  * between PCM/DOP and native DSD mode
  */
-static bool is_teac_50X_dac(unsigned int id)
+static bool is_teac_dsd_dac(unsigned int id)
 {
        switch (id) {
        case USB_ID(0x0644, 0x8043): /* TEAC UD-501/UD-503/NT-503 */
+       case USB_ID(0x0644, 0x8044): /* Esoteric D-05X */
                return true;
        }
        return false;
@@ -1202,7 +1203,7 @@ int snd_usb_select_mode_quirk(struct snd_usb_substream *subs,
                        break;
                }
                mdelay(20);
-       } else if (is_teac_50X_dac(subs->stream->chip->usb_id)) {
+       } else if (is_teac_dsd_dac(subs->stream->chip->usb_id)) {
                /* Vendor mode switch cmd is required. */
                switch (fmt->altsetting) {
                case 3: /* DSD mode (DSD_U32) requested */
@@ -1392,7 +1393,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        }
 
        /* TEAC devices with USB DAC functionality */
-       if (is_teac_50X_dac(chip->usb_id)) {
+       if (is_teac_dsd_dac(chip->usb_id)) {
                if (fp->altsetting == 3)
                        return SNDRV_PCM_FMTBIT_DSD_U32_BE;
        }
index cefe7c7cd4f6f29fabd90e33495d7a6d8ac6dbdd..0a8e37a519f258e72317f39a87ac9cc60ad47f6f 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
 #define _UAPI__ASM_BPF_PERF_EVENT_H__
 
-#include <asm/ptrace.h>
+#include "ptrace.h"
 
 typedef user_pt_regs bpf_user_pt_regs_t;
 
diff --git a/tools/arch/s390/include/uapi/asm/perf_regs.h b/tools/arch/s390/include/uapi/asm/perf_regs.h
new file mode 100644 (file)
index 0000000..d17dd9e
--- /dev/null
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_S390_PERF_REGS_H
+#define _ASM_S390_PERF_REGS_H
+
+enum perf_event_s390_regs {
+       PERF_REG_S390_R0,
+       PERF_REG_S390_R1,
+       PERF_REG_S390_R2,
+       PERF_REG_S390_R3,
+       PERF_REG_S390_R4,
+       PERF_REG_S390_R5,
+       PERF_REG_S390_R6,
+       PERF_REG_S390_R7,
+       PERF_REG_S390_R8,
+       PERF_REG_S390_R9,
+       PERF_REG_S390_R10,
+       PERF_REG_S390_R11,
+       PERF_REG_S390_R12,
+       PERF_REG_S390_R13,
+       PERF_REG_S390_R14,
+       PERF_REG_S390_R15,
+       PERF_REG_S390_FP0,
+       PERF_REG_S390_FP1,
+       PERF_REG_S390_FP2,
+       PERF_REG_S390_FP3,
+       PERF_REG_S390_FP4,
+       PERF_REG_S390_FP5,
+       PERF_REG_S390_FP6,
+       PERF_REG_S390_FP7,
+       PERF_REG_S390_FP8,
+       PERF_REG_S390_FP9,
+       PERF_REG_S390_FP10,
+       PERF_REG_S390_FP11,
+       PERF_REG_S390_FP12,
+       PERF_REG_S390_FP13,
+       PERF_REG_S390_FP14,
+       PERF_REG_S390_FP15,
+       PERF_REG_S390_MASK,
+       PERF_REG_S390_PC,
+
+       PERF_REG_S390_MAX
+};
+
+#endif /* _ASM_S390_PERF_REGS_H */
index e2450c8e88e6f13cfc0934b772880e80b6f18c37..a8c3a33dd185e213f68f97b8719c7ca20198783e 100644 (file)
@@ -523,21 +523,23 @@ static int do_show(int argc, char **argv)
                                break;
                        p_err("can't get next map: %s%s", strerror(errno),
                              errno == EINVAL ? " -- kernel too old?" : "");
-                       return -1;
+                       break;
                }
 
                fd = bpf_map_get_fd_by_id(id);
                if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
                        p_err("can't get map by id (%u): %s",
                              id, strerror(errno));
-                       return -1;
+                       break;
                }
 
                err = bpf_obj_get_info_by_fd(fd, &info, &len);
                if (err) {
                        p_err("can't get map info: %s", strerror(errno));
                        close(fd);
-                       return -1;
+                       break;
                }
 
                if (json_output)
index ad619b96c27664300df9a78bc28958c4d8662a4b..dded77345bfb05fef1fd50a3f3e7f54d43ae5cb7 100644 (file)
@@ -382,6 +382,8 @@ static int do_show(int argc, char **argv)
 
                fd = bpf_prog_get_fd_by_id(id);
                if (fd < 0) {
+                       if (errno == ENOENT)
+                               continue;
                        p_err("can't get prog by id (%u): %s",
                              id, strerror(errno));
                        err = -1;
index 217cf6f95c366037ccd2ff3cedb1d61de22c616a..a5684d0968b4fd087905e659c0ce80bd170434c2 100755 (executable)
@@ -478,7 +478,7 @@ class Provider(object):
     @staticmethod
     def is_field_wanted(fields_filter, field):
         """Indicate whether field is valid according to fields_filter."""
-        if not fields_filter or fields_filter == "help":
+        if not fields_filter:
             return True
         return re.match(fields_filter, field) is not None
 
@@ -549,8 +549,8 @@ class TracepointProvider(Provider):
 
     def update_fields(self, fields_filter):
         """Refresh fields, applying fields_filter"""
-        self._fields = [field for field in self.get_available_fields()
-                        if self.is_field_wanted(fields_filter, field)]
+        self.fields = [field for field in self.get_available_fields()
+                       if self.is_field_wanted(fields_filter, field)]
 
     @staticmethod
     def get_online_cpus():
@@ -950,7 +950,8 @@ class Tui(object):
             curses.nocbreak()
             curses.endwin()
 
-    def get_all_gnames(self):
+    @staticmethod
+    def get_all_gnames():
         """Returns a list of (pid, gname) tuples of all running guests"""
         res = []
         try:
@@ -963,7 +964,7 @@ class Tui(object):
             # perform a sanity check before calling the more expensive
             # function to possibly extract the guest name
             if ' -name ' in line[1]:
-                res.append((line[0], self.get_gname_from_pid(line[0])))
+                res.append((line[0], Tui.get_gname_from_pid(line[0])))
         child.stdout.close()
 
         return res
@@ -984,7 +985,8 @@ class Tui(object):
         except Exception:
             self.screen.addstr(row + 1, 2, 'Not available')
 
-    def get_pid_from_gname(self, gname):
+    @staticmethod
+    def get_pid_from_gname(gname):
         """Fuzzy function to convert guest name to QEMU process pid.
 
         Returns a list of potential pids, can be empty if no match found.
@@ -992,7 +994,7 @@ class Tui(object):
 
         """
         pids = []
-        for line in self.get_all_gnames():
+        for line in Tui.get_all_gnames():
             if gname == line[1]:
                 pids.append(int(line[0]))
 
@@ -1090,15 +1092,16 @@ class Tui(object):
             # sort by totals
             return (0, -stats[x][0])
         total = 0.
-        for val in stats.values():
-            total += val[0]
+        for key in stats.keys():
+            if key.find('(') is -1:
+                total += stats[key][0]
         if self._sorting == SORT_DEFAULT:
             sortkey = sortCurAvg
         else:
             sortkey = sortTotal
+        tavg = 0
         for key in sorted(stats.keys(), key=sortkey):
-
-            if row >= self.screen.getmaxyx()[0]:
+            if row >= self.screen.getmaxyx()[0] - 1:
                 break
             values = stats[key]
             if not values[0] and not values[1]:
@@ -1110,9 +1113,15 @@ class Tui(object):
                 self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' %
                                    (key, values[0], values[0] * 100 / total,
                                     cur))
+                if cur is not '' and key.find('(') is -1:
+                    tavg += cur
             row += 1
         if row == 3:
             self.screen.addstr(4, 1, 'No matching events reported yet')
+        else:
+            self.screen.addstr(row, 1, '%-40s %10d        %8s' %
+                               ('Total', total, tavg if tavg else ''),
+                               curses.A_BOLD)
         self.screen.refresh()
 
     def show_msg(self, text):
@@ -1358,7 +1367,7 @@ class Tui(object):
                 if char == 'x':
                     self.update_drilldown()
                     # prevents display of current values on next refresh
-                    self.stats.get()
+                    self.stats.get(self._display_guests)
             except KeyboardInterrupt:
                 break
             except curses.error:
@@ -1451,16 +1460,13 @@ Press any other key to refresh statistics immediately.
         try:
             pids = Tui.get_pid_from_gname(val)
         except:
-            raise optparse.OptionValueError('Error while searching for guest '
-                                            '"{}", use "-p" to specify a pid '
-                                            'instead'.format(val))
+            sys.exit('Error while searching for guest "{}". Use "-p" to '
+                     'specify a pid instead?'.format(val))
         if len(pids) == 0:
-            raise optparse.OptionValueError('No guest by the name "{}" '
-                                            'found'.format(val))
+            sys.exit('Error: No guest by the name "{}" found'.format(val))
         if len(pids) > 1:
-            raise optparse.OptionValueError('Multiple processes found (pids: '
-                                            '{}) - use "-p" to specify a pid '
-                                            'instead'.format(" ".join(pids)))
+            sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
+                     'to specify the desired pid'.format(" ".join(pids)))
         parser.values.pid = pids[0]
 
     optparser = optparse.OptionParser(description=description_text,
@@ -1518,7 +1524,16 @@ Press any other key to refresh statistics immediately.
                          help='restrict statistics to guest by name',
                          callback=cb_guest_to_pid,
                          )
-    (options, _) = optparser.parse_args(sys.argv)
+    options, unkn = optparser.parse_args(sys.argv)
+    if len(unkn) != 1:
+        sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
+    try:
+        # verify that we were passed a valid regex up front
+        re.compile(options.fields)
+    except re.error:
+        sys.exit('Error: "' + options.fields + '" is not a valid regular '
+                 'expression')
+
     return options
 
 
@@ -1564,16 +1579,13 @@ def main():
 
     stats = Stats(options)
 
-    if options.fields == "help":
-        event_list = "\n"
-        s = stats.get()
-        for key in s.keys():
-            if key.find('(') != -1:
-                key = key[0:key.find('(')]
-            if event_list.find('\n' + key + '\n') == -1:
-                event_list += key + '\n'
-        sys.stdout.write(event_list)
-        return ""
+    if options.fields == 'help':
+        stats.fields_filter = None
+        event_list = []
+        for key in stats.get().keys():
+            event_list.append(key.split('(', 1)[0])
+        sys.stdout.write('  ' + '\n  '.join(sorted(set(event_list))) + '\n')
+        sys.exit(0)
 
     if options.log:
         log(stats)
index e5cf836be8a1848bb82f39cfa3c7c75dcc67b4fa..b5b3810c9e945d7f3a39568840fbc5b73f84983b 100644 (file)
@@ -50,6 +50,8 @@ INTERACTIVE COMMANDS
 *s*::   set update interval
 
 *x*::  toggle reporting of stats for child trace events
+ ::     *Note*: The stats for the parents summarize the respective child trace
+                events
 
 Press any other key to refresh statistics immediately.
 
@@ -86,7 +88,7 @@ OPTIONS
 
 -f<fields>::
 --fields=<fields>::
-       fields to display (regex)
+       fields to display (regex), "-f help" for a list of available events
 
 -h::
 --help::
index ae0272f9a09184db54933fe079b9cf529bcf47d5..e6acc281dd3757dbd03ba32c69a89b76ed690800 100644 (file)
@@ -46,7 +46,7 @@ $(OBJTOOL_IN): fixdep FORCE
        @$(MAKE) $(build)=objtool
 
 $(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
-       @./sync-check.sh
+       @$(CONFIG_SHELL) ./sync-check.sh
        $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
 
 
index 8acfc47af70efde4c1a3bb3ad6aff809f0ad0308..540a209b78ab3cd6ae3b972c57b338dc0aa9b58d 100644 (file)
@@ -138,7 +138,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
                        *type = INSN_STACK;
                        op->src.type = OP_SRC_ADD;
                        op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
-                       op->dest.type = OP_SRC_REG;
+                       op->dest.type = OP_DEST_REG;
                        op->dest.reg = CFI_SP;
                }
                break;
index 4c6b5c9ef073b31c3a01dd0ebb85efbccb4bfee0..91e8e19ff5e06193adc55c455b4d97ad947da7ee 100644 (file)
@@ -44,6 +44,9 @@ int cmd_orc(int argc, const char **argv)
        const char *objname;
 
        argc--; argv++;
+       if (argc <= 0)
+               usage_with_options(orc_usage, check_options);
+
        if (!strncmp(argv[0], "gen", 3)) {
                argc = parse_options(argc, argv, check_options, orc_usage, 0);
                if (argc != 1)
@@ -52,7 +55,6 @@ int cmd_orc(int argc, const char **argv)
                objname = argv[0];
 
                return check(objname, no_fp, no_unreachable, true);
-
        }
 
        if (!strcmp(argv[0], "dump")) {
index 9b341584eb1b56b05761bea127ee6cf94eacc190..f40d46e24bcce3efe385aff13a4cb9274967d0d6 100644 (file)
@@ -427,6 +427,40 @@ static void add_ignores(struct objtool_file *file)
        }
 }
 
+/*
+ * FIXME: For now, just ignore any alternatives which add retpolines.  This is
+ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
+ * But it at least allows objtool to understand the control flow *around* the
+ * retpoline.
+ */
+static int add_nospec_ignores(struct objtool_file *file)
+{
+       struct section *sec;
+       struct rela *rela;
+       struct instruction *insn;
+
+       sec = find_section_by_name(file->elf, ".rela.discard.nospec");
+       if (!sec)
+               return 0;
+
+       list_for_each_entry(rela, &sec->rela_list, list) {
+               if (rela->sym->type != STT_SECTION) {
+                       WARN("unexpected relocation symbol type in %s", sec->name);
+                       return -1;
+               }
+
+               insn = find_insn(file, rela->sym->sec, rela->addend);
+               if (!insn) {
+                       WARN("bad .discard.nospec entry");
+                       return -1;
+               }
+
+               insn->ignore_alts = true;
+       }
+
+       return 0;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -456,6 +490,13 @@ static int add_jump_destinations(struct objtool_file *file)
                } else if (rela->sym->sec->idx) {
                        dest_sec = rela->sym->sec;
                        dest_off = rela->sym->sym.st_value + rela->addend + 4;
+               } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
+                       /*
+                        * Retpoline jumps are really dynamic jumps in
+                        * disguise, so convert them accordingly.
+                        */
+                       insn->type = INSN_JUMP_DYNAMIC;
+                       continue;
                } else {
                        /* sibling call */
                        insn->jump_dest = 0;
@@ -502,11 +543,18 @@ static int add_call_destinations(struct objtool_file *file)
                        dest_off = insn->offset + insn->len + insn->immediate;
                        insn->call_dest = find_symbol_by_offset(insn->sec,
                                                                dest_off);
+                       /*
+                        * FIXME: Thanks to retpolines, it's now considered
+                        * normal for a function to call within itself.  So
+                        * disable this warning for now.
+                        */
+#if 0
                        if (!insn->call_dest) {
                                WARN_FUNC("can't find call dest symbol at offset 0x%lx",
                                          insn->sec, insn->offset, dest_off);
                                return -1;
                        }
+#endif
                } else if (rela->sym->type == STT_SECTION) {
                        insn->call_dest = find_symbol_by_offset(rela->sym->sec,
                                                                rela->addend+4);
@@ -671,12 +719,6 @@ static int add_special_section_alts(struct objtool_file *file)
                return ret;
 
        list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
-               alt = malloc(sizeof(*alt));
-               if (!alt) {
-                       WARN("malloc failed");
-                       ret = -1;
-                       goto out;
-               }
 
                orig_insn = find_insn(file, special_alt->orig_sec,
                                      special_alt->orig_off);
@@ -687,6 +729,10 @@ static int add_special_section_alts(struct objtool_file *file)
                        goto out;
                }
 
+               /* Ignore retpoline alternatives. */
+               if (orig_insn->ignore_alts)
+                       continue;
+
                new_insn = NULL;
                if (!special_alt->group || special_alt->new_len) {
                        new_insn = find_insn(file, special_alt->new_sec,
@@ -712,6 +758,13 @@ static int add_special_section_alts(struct objtool_file *file)
                                goto out;
                }
 
+               alt = malloc(sizeof(*alt));
+               if (!alt) {
+                       WARN("malloc failed");
+                       ret = -1;
+                       goto out;
+               }
+
                alt->insn = new_insn;
                list_add_tail(&alt->list, &orig_insn->alts);
 
@@ -1028,6 +1081,10 @@ static int decode_sections(struct objtool_file *file)
 
        add_ignores(file);
 
+       ret = add_nospec_ignores(file);
+       if (ret)
+               return ret;
+
        ret = add_jump_destinations(file);
        if (ret)
                return ret;
index 47d9ea70a83d9f9326fb7bd68431c88337b5ce31..dbadb304a410af0343809752bc6d2869668a9db0 100644 (file)
@@ -44,7 +44,7 @@ struct instruction {
        unsigned int len;
        unsigned char type;
        unsigned long immediate;
-       bool alt_group, visited, dead_end, ignore, hint, save, restore;
+       bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
        struct symbol *call_dest;
        struct instruction *jump_dest;
        struct list_head alts;
index e5ca31429c9bac29a9c66c4f8ddf97a94651690b..e61fe703197baa9b21814674c0250f6420e38c93 100644 (file)
@@ -165,6 +165,8 @@ int create_orc_sections(struct objtool_file *file)
 
        /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
        sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+       if (!sec)
+               return -1;
 
        ip_relasec = elf_create_rela_section(file->elf, sec);
        if (!ip_relasec)
index ed65e82f034efe0e76cef718fdb8a9bd07511edb..0294bfb6c5f87c990500ee57e931034091d0da48 100644 (file)
@@ -188,9 +188,7 @@ ifdef PYTHON_CONFIG
   PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
   PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
   PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
-  ifeq ($(CC_NO_CLANG), 1)
-    PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
-  endif
+  PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS))
   FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
 endif
 
@@ -576,14 +574,15 @@ ifndef NO_GTK2
   endif
 endif
 
-
 ifdef NO_LIBPERL
   CFLAGS += -DNO_LIBPERL
 else
   PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
   PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
   PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
-  PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+  PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
+  PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+  PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
   FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
 
   ifneq ($(feature-libperl), 1)
index d2df54a6bc5a24a588ca3153833623ca0ecaa1c3..bcfbaed78cc257f15d9820238bc1ef58256f1c6a 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <stdlib.h>
 #include <linux/types.h>
-#include <../../../../arch/s390/include/uapi/asm/perf_regs.h>
+#include <asm/perf_regs.h>
 
 void perf_regs_load(u64 *regs);
 
index 6db9d809fe9722a9e4eb0afb5140443490aa43e3..3e64f10b6d6678810715e517e7354679b86efd8e 100755 (executable)
@@ -21,6 +21,7 @@ arch/x86/include/asm/cpufeatures.h
 arch/arm/include/uapi/asm/perf_regs.h
 arch/arm64/include/uapi/asm/perf_regs.h
 arch/powerpc/include/uapi/asm/perf_regs.h
+arch/s390/include/uapi/asm/perf_regs.h
 arch/x86/include/uapi/asm/perf_regs.h
 arch/x86/include/uapi/asm/kvm.h
 arch/x86/include/uapi/asm/kvm_perf.h
index cf36de7ea25587907d50db9c44e3dae5c3746c1a..0c6d1002b524eaf62ef62cc32763b041b2f33ba1 100644 (file)
@@ -384,13 +384,13 @@ jvmti_write_code(void *agent, char const *sym,
 }
 
 int
-jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
-                      jvmti_line_info_t *li, int nr_lines)
+jvmti_write_debug_info(void *agent, uint64_t code,
+    int nr_lines, jvmti_line_info_t *li,
+    const char * const * file_names)
 {
        struct jr_code_debug_info rec;
-       size_t sret, len, size, flen;
+       size_t sret, len, size, flen = 0;
        uint64_t addr;
-       const char *fn = file;
        FILE *fp = agent;
        int i;
 
@@ -405,7 +405,9 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
                return -1;
        }
 
-       flen = strlen(file) + 1;
+       for (i = 0; i < nr_lines; ++i) {
+           flen += strlen(file_names[i]) + 1;
+       }
 
        rec.p.id        = JIT_CODE_DEBUG_INFO;
        size            = sizeof(rec);
@@ -421,7 +423,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
         * file[]   : source file name
         */
        size += nr_lines * sizeof(struct debug_entry);
-       size += flen * nr_lines;
+       size += flen;
        rec.p.total_size = size;
 
        /*
@@ -452,7 +454,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, const char *file,
                if (sret != 1)
                        goto error;
 
-               sret = fwrite_unlocked(fn, flen, 1, fp);
+               sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp);
                if (sret != 1)
                        goto error;
        }
index fe32d8344a823f56de37f3ecea53af4c2f78329f..6ed82f6c06ddd0ed85ee79a5015707753d4f38b9 100644 (file)
@@ -14,6 +14,7 @@ typedef struct {
        unsigned long   pc;
        int             line_number;
        int             discrim; /* discriminator -- 0 for now */
+       jmethodID       methodID;
 } jvmti_line_info_t;
 
 void *jvmti_open(void);
@@ -22,11 +23,9 @@ int   jvmti_write_code(void *agent, char const *symbol_name,
                       uint64_t vma, void const *code,
                       const unsigned int code_size);
 
-int   jvmti_write_debug_info(void *agent,
-                            uint64_t code,
-                            const char *file,
+int   jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines,
                             jvmti_line_info_t *li,
-                            int nr_lines);
+                            const char * const * file_names);
 
 #if defined(__cplusplus)
 }
index c62c9fc9a525995c83ef8fc8bf2f790d599abadc..6add3e9826141346a34a93b9e0a970a22720e361 100644 (file)
@@ -47,6 +47,7 @@ do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
                        tab[lines].pc = (unsigned long)pc;
                        tab[lines].line_number = loc_tab[i].line_number;
                        tab[lines].discrim = 0; /* not yet used */
+                       tab[lines].methodID = m;
                        lines++;
                } else {
                        break;
@@ -125,6 +126,99 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
        return JVMTI_ERROR_NONE;
 }
 
+static void
+copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length)
+{
+       /*
+       * Assume path name is class hierarchy, this is a common practice with Java programs
+       */
+       if (*class_sign == 'L') {
+               int j, i = 0;
+               char *p = strrchr(class_sign, '/');
+               if (p) {
+                       /* drop the 'L' prefix and copy up to the final '/' */
+                       for (i = 0; i < (p - class_sign); i++)
+                               result[i] = class_sign[i+1];
+               }
+               /*
+               * append file name, we use loops and not string ops to avoid modifying
+               * class_sign which is used later for the symbol name
+               */
+               for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++)
+                       result[i] = file_name[j];
+
+               result[i] = '\0';
+       } else {
+               /* fallback case */
+               size_t file_name_len = strlen(file_name);
+               strncpy(result, file_name, file_name_len < max_length ? file_name_len : max_length);
+       }
+}
+
+static jvmtiError
+get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer)
+{
+       jvmtiError ret;
+       jclass decl_class;
+       char *file_name = NULL;
+       char *class_sign = NULL;
+       char fn[PATH_MAX];
+       size_t len;
+
+       ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class);
+       if (ret != JVMTI_ERROR_NONE) {
+               print_error(jvmti, "GetMethodDeclaringClass", ret);
+               return ret;
+       }
+
+       ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+       if (ret != JVMTI_ERROR_NONE) {
+               print_error(jvmti, "GetSourceFileName", ret);
+               return ret;
+       }
+
+       ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL);
+       if (ret != JVMTI_ERROR_NONE) {
+               print_error(jvmti, "GetClassSignature", ret);
+               goto free_file_name_error;
+       }
+
+       copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+       len = strlen(fn);
+       *buffer = malloc((len + 1) * sizeof(char));
+       if (!*buffer) {
+               print_error(jvmti, "GetClassSignature", ret);
+               ret = JVMTI_ERROR_OUT_OF_MEMORY;
+               goto free_class_sign_error;
+       }
+       strcpy(*buffer, fn);
+       ret = JVMTI_ERROR_NONE;
+
+free_class_sign_error:
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+free_file_name_error:
+       (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+
+       return ret;
+}
+
+static jvmtiError
+fill_source_filenames(jvmtiEnv *jvmti, int nr_lines,
+                     const jvmti_line_info_t * line_tab,
+                     char ** file_names)
+{
+       int index;
+       jvmtiError ret;
+
+       for (index = 0; index < nr_lines; ++index) {
+               ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index]));
+               if (ret != JVMTI_ERROR_NONE)
+                       return ret;
+       }
+
+       return JVMTI_ERROR_NONE;
+}
+
 static void JNICALL
 compiled_method_load_cb(jvmtiEnv *jvmti,
                        jmethodID method,
@@ -135,16 +229,18 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
                        const void *compile_info)
 {
        jvmti_line_info_t *line_tab = NULL;
+       char ** line_file_names = NULL;
        jclass decl_class;
        char *class_sign = NULL;
        char *func_name = NULL;
        char *func_sign = NULL;
-       char *file_name= NULL;
+       char *file_name = NULL;
        char fn[PATH_MAX];
        uint64_t addr = (uint64_t)(uintptr_t)code_addr;
        jvmtiError ret;
        int nr_lines = 0; /* in line_tab[] */
        size_t len;
+       int output_debug_info = 0;
 
        ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
                                                &decl_class);
@@ -158,6 +254,19 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
                if (ret != JVMTI_ERROR_NONE) {
                        warnx("jvmti: cannot get line table for method");
                        nr_lines = 0;
+               } else if (nr_lines > 0) {
+                       line_file_names = malloc(sizeof(char*) * nr_lines);
+                       if (!line_file_names) {
+                               warnx("jvmti: cannot allocate space for line table method names");
+                       } else {
+                               memset(line_file_names, 0, sizeof(char*) * nr_lines);
+                               ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names);
+                               if (ret != JVMTI_ERROR_NONE) {
+                                       warnx("jvmti: fill_source_filenames failed");
+                               } else {
+                                       output_debug_info = 1;
+                               }
+                       }
                }
        }
 
@@ -181,33 +290,14 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
                goto error;
        }
 
-       /*
-        * Assume path name is class hierarchy, this is a common practice with Java programs
-        */
-       if (*class_sign == 'L') {
-               int j, i = 0;
-               char *p = strrchr(class_sign, '/');
-               if (p) {
-                       /* drop the 'L' prefix and copy up to the final '/' */
-                       for (i = 0; i < (p - class_sign); i++)
-                               fn[i] = class_sign[i+1];
-               }
-               /*
-                * append file name, we use loops and not string ops to avoid modifying
-                * class_sign which is used later for the symbol name
-                */
-               for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++)
-                       fn[i] = file_name[j];
-               fn[i] = '\0';
-       } else {
-               /* fallback case */
-               strcpy(fn, file_name);
-       }
+       copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+
        /*
         * write source line info record if we have it
         */
-       if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines))
-               warnx("jvmti: write_debug_info() failed");
+       if (output_debug_info)
+               if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names))
+                       warnx("jvmti: write_debug_info() failed");
 
        len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
        {
@@ -223,6 +313,13 @@ error:
        (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
        (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
        free(line_tab);
+       while (line_file_names && (nr_lines > 0)) {
+           if (line_file_names[nr_lines - 1]) {
+               free(line_file_names[nr_lines - 1]);
+           }
+           nr_lines -= 1;
+       }
+       free(line_file_names);
 }
 
 static void JNICALL
index 792af7c3b74f98029ae32953a26e342157be5632..9316e648a880db61cad82c399c30218695d7d215 100644 (file)
@@ -11,7 +11,7 @@ ifneq ($(wildcard $(GENHDR)),)
 endif
 
 CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
-LDLIBS += -lcap -lelf
+LDLIBS += -lcap -lelf -lrt
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
        test_align test_verifier_log test_dev_cgroup
@@ -39,7 +39,7 @@ $(BPFOBJ): force
 CLANG ?= clang
 LLC   ?= llc
 
-PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
 
 # Let newer LLVM versions transparently probe the kernel for availability
 # of full BPF instruction set.
index 8591c89c0828a9ff780c63f4f83f3366f79e198f..471bbbdb94db75ece343f6a5191b7501dea1ee85 100644 (file)
@@ -474,27 +474,7 @@ static struct bpf_align_test tests[] = {
                .result = REJECT,
                .matches = {
                        {4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
-                       /* ptr & 0x40 == either 0 or 0x40 */
-                       {5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
-                       /* ptr << 2 == unknown, (4n) */
-                       {7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
-                       /* (4n) + 14 == (4n+2).  We blow our bounds, because
-                        * the add could overflow.
-                        */
-                       {8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
-                       /* Checked s>=0 */
-                       {10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-                       /* packet pointer + nonnegative (4n+2) */
-                       {12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-                       {14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-                       /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
-                        * We checked the bounds, but it might have been able
-                        * to overflow if the packet pointer started in the
-                        * upper half of the address space.
-                        * So we did not get a 'range' on R6, and the access
-                        * attempt will fail.
-                        */
-                       {16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+                       /* R5 bitwise operator &= on pointer prohibited */
                }
        },
        {
index 69427531408dd22ef1887d473ab4bf6548e173b9..6761be18a91fccc2d4f8ad52b0f83fec293189ae 100644 (file)
@@ -351,7 +351,7 @@ static void test_bpf_obj_id(void)
                          info_len != sizeof(struct bpf_map_info) ||
                          strcmp((char *)map_infos[i].name, expected_map_name),
                          "get-map-info(fd)",
-                         "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+                         "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
                          err, errno,
                          map_infos[i].type, BPF_MAP_TYPE_ARRAY,
                          info_len, sizeof(struct bpf_map_info),
@@ -395,7 +395,7 @@ static void test_bpf_obj_id(void)
                          *(int *)prog_infos[i].map_ids != map_infos[i].id ||
                          strcmp((char *)prog_infos[i].name, expected_prog_name),
                          "get-prog-info(fd)",
-                         "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+                         "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
                          err, errno, i,
                          prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
                          info_len, sizeof(struct bpf_prog_info),
@@ -463,7 +463,7 @@ static void test_bpf_obj_id(void)
                      memcmp(&prog_info, &prog_infos[i], info_len) ||
                      *(int *)prog_info.map_ids != saved_map_id,
                      "get-prog-info(next_id->fd)",
-                     "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n",
+                     "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
                      err, errno, info_len, sizeof(struct bpf_prog_info),
                      memcmp(&prog_info, &prog_infos[i], info_len),
                      *(int *)prog_info.map_ids, saved_map_id);
@@ -509,7 +509,7 @@ static void test_bpf_obj_id(void)
                      memcmp(&map_info, &map_infos[i], info_len) ||
                      array_value != array_magic_value,
                      "check get-map-info(next_id->fd)",
-                     "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n",
+                     "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
                      err, errno, info_len, sizeof(struct bpf_map_info),
                      memcmp(&map_info, &map_infos[i], info_len),
                      array_value, array_magic_value);
index 3c64f30cf63cc2b6adb532a3b1f3201533193f7f..b51017404c62d0dc8198afdf035016f6e5e2fd0b 100644 (file)
@@ -422,9 +422,7 @@ static struct bpf_test tests[] = {
                        BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr_unpriv = "R1 subtraction from stack pointer",
-               .result_unpriv = REJECT,
-               .errstr = "R1 invalid mem access",
+               .errstr = "R1 subtraction from stack pointer",
                .result = REJECT,
        },
        {
@@ -606,7 +604,6 @@ static struct bpf_test tests[] = {
                },
                .errstr = "misaligned stack access",
                .result = REJECT,
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
        },
        {
                "invalid map_fd for function call",
@@ -1797,7 +1794,6 @@ static struct bpf_test tests[] = {
                },
                .result = REJECT,
                .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
        },
        {
                "PTR_TO_STACK store/load - bad alignment on reg",
@@ -1810,7 +1806,6 @@ static struct bpf_test tests[] = {
                },
                .result = REJECT,
                .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
-               .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
        },
        {
                "PTR_TO_STACK store/load - out of bounds low",
@@ -1862,9 +1857,8 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .result = ACCEPT,
-               .result_unpriv = REJECT,
-               .errstr_unpriv = "R1 pointer += pointer",
+               .result = REJECT,
+               .errstr = "R1 pointer += pointer",
        },
        {
                "unpriv: neg pointer",
@@ -2592,7 +2586,8 @@ static struct bpf_test tests[] = {
                        BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
                                    offsetof(struct __sk_buff, data)),
                        BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
-                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, len)),
                        BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
                        BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
                        BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@ -2899,7 +2894,7 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "invalid access to packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
@@ -3885,9 +3880,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3, 11 },
-               .errstr_unpriv = "R0 pointer += pointer",
-               .errstr = "R0 invalid mem access 'inv'",
-               .result_unpriv = REJECT,
+               .errstr = "R0 pointer += pointer",
                .result = REJECT,
                .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
        },
@@ -3928,7 +3921,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
@@ -3949,7 +3942,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
@@ -3970,7 +3963,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R4 invalid mem access",
+               .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
@@ -5195,10 +5188,8 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 bitwise operator &= on pointer",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 bitwise operator &= on pointer",
                .result = REJECT,
-               .result_unpriv = REJECT,
        },
        {
                "map element value illegal alu op, 2",
@@ -5214,10 +5205,8 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 32-bit pointer arithmetic prohibited",
                .result = REJECT,
-               .result_unpriv = REJECT,
        },
        {
                "map element value illegal alu op, 3",
@@ -5233,10 +5222,8 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map2 = { 3 },
-               .errstr_unpriv = "R0 pointer arithmetic with /= operator",
-               .errstr = "invalid mem access 'inv'",
+               .errstr = "R0 pointer arithmetic with /= operator",
                .result = REJECT,
-               .result_unpriv = REJECT,
        },
        {
                "map element value illegal alu op, 4",
@@ -6019,8 +6006,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map_in_map = { 3 },
-               .errstr = "R1 type=inv expected=map_ptr",
-               .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+               .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
                .result = REJECT,
        },
        {
@@ -6116,6 +6102,30 @@ static struct bpf_test tests[] = {
                },
                .result = ACCEPT,
        },
+       {
+               "ld_abs: tests on r6 and skb data reload helper",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+                       BPF_MOV64_IMM(BPF_REG_6, 0),
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       BPF_MOV64_IMM(BPF_REG_3, 2),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_skb_vlan_push),
+                       BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+                       BPF_LD_ABS(BPF_B, 0),
+                       BPF_LD_ABS(BPF_H, 0),
+                       BPF_LD_ABS(BPF_W, 0),
+                       BPF_MOV64_IMM(BPF_REG_0, 42),
+                       BPF_EXIT_INSN(),
+               },
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+               .result = ACCEPT,
+       },
        {
                "ld_ind: check calling conv, r1",
                .insns = {
@@ -6300,7 +6310,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6324,7 +6334,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6350,7 +6360,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R8 invalid mem access 'inv'",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6375,7 +6385,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R8 invalid mem access 'inv'",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6423,7 +6433,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6494,7 +6504,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6545,7 +6555,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6572,7 +6582,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6598,7 +6608,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6627,7 +6637,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6657,7 +6667,7 @@ static struct bpf_test tests[] = {
                        BPF_JMP_IMM(BPF_JA, 0, 0, -7),
                },
                .fixup_map1 = { 4 },
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
        },
        {
@@ -6685,8 +6695,7 @@ static struct bpf_test tests[] = {
                        BPF_EXIT_INSN(),
                },
                .fixup_map1 = { 3 },
-               .errstr_unpriv = "R0 pointer comparison prohibited",
-               .errstr = "R0 min value is negative",
+               .errstr = "unbounded min value",
                .result = REJECT,
                .result_unpriv = REJECT,
        },
@@ -6741,6 +6750,462 @@ static struct bpf_test tests[] = {
                .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
                .result = REJECT,
        },
+       {
+               "bounds check based on zero-extended MOV",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0x0000'0000'ffff'ffff */
+                       BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0 */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check based on sign-extended MOV. test1",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0xffff'ffff'ffff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0xffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+                       /* r0 = <oob pointer> */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access to OOB pointer */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 4294967295",
+               .result = REJECT
+       },
+       {
+               "bounds check based on sign-extended MOV. test2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       /* r2 = 0xffff'ffff'ffff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+                       /* r2 = 0xfff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
+                       /* r0 = <oob pointer> */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+                       /* access to OOB pointer */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 min value is outside of the array range",
+               .result = REJECT
+       },
+       {
+               "bounds check based on reg_off + var_off + insn_off. test1",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct __sk_buff, mark)),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr = "value_size=8 off=1073741825",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "bounds check based on reg_off + var_off + insn_off. test2",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+                                   offsetof(struct __sk_buff, mark)),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 4 },
+               .errstr = "value 1073741823",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
+       {
+               "bounds check after truncation of non-boundary-crossing range",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_MOV64_IMM(BPF_REG_2, 1),
+                       /* r2 = 0x10'0000'0000 */
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
+                       /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+                       /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0 */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check after truncation of boundary-crossing range (1)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0xffff'ffff] or
+                        *      [0x0000'0000, 0x0000'007f]
+                        */
+                       BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0x00, 0xff] or
+                        *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = 0 or
+                        *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op or OOB pointer computation */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               /* not actually fully unbounded, but the bound is very high */
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check after truncation of boundary-crossing range (2)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0xffff'ff80, 0xffff'ffff] or
+                        *      [0x0000'0000, 0x0000'007f]
+                        * difference to previous test: truncation via MOV32
+                        * instead of ALU32.
+                        */
+                       BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = [0x00, 0xff] or
+                        *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+                       /* r1 = 0 or
+                        *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+                        */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* no-op or OOB pointer computation */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               /* not actually fully unbounded, but the bound is very high */
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check after wrapping 32-bit addition",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+                       /* r1 = 0x7fff'ffff */
+                       BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0xffff'fffe */
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       /* r1 = 0 */
+                       BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
+                       /* no-op */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* access at offset 0 */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT
+       },
+       {
+               "bounds check after shift with oversized count operand",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_MOV64_IMM(BPF_REG_2, 32),
+                       BPF_MOV64_IMM(BPF_REG_1, 1),
+                       /* r1 = (u32)1 << (u32)32 = ? */
+                       BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
+                       /* r1 = [0x0000, 0xffff] */
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
+                       /* computes unknown pointer, potentially OOB */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 max value is outside of the array range",
+               .result = REJECT
+       },
+       {
+               "bounds check after right shift of maybe-negative number",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       /* r1 = [0x00, 0xff] */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       /* r1 = [-0x01, 0xfe] */
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+                       /* r1 = 0 or 0xff'ffff'ffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* r1 = 0 or 0xffff'ffff'ffff */
+                       BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+                       /* computes unknown pointer, potentially OOB */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       /* potentially OOB access */
+                       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+                       /* exit */
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "R0 unbounded memory access",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test1",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 2147483646",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test2",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "pointer offset 1073741822",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test3",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "pointer offset -1073741822",
+               .result = REJECT
+       },
+       {
+               "bounds check map access with off+size signed 32bit overflow. test4",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+                       BPF_EXIT_INSN(),
+                       BPF_MOV64_IMM(BPF_REG_1, 1000000),
+                       BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .errstr = "map_value pointer and 1000000000000",
+               .result = REJECT
+       },
+       {
+               "pointer/scalar confusion in state equality check (way 1)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_JMP_A(1),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+                       BPF_JMP_A(0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .errstr_unpriv = "R0 leaks addr as return value"
+       },
+       {
+               "pointer/scalar confusion in state equality check (way 2)",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+                       BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+                       BPF_JMP_A(1),
+                       BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 3 },
+               .result = ACCEPT,
+               .result_unpriv = REJECT,
+               .errstr_unpriv = "R0 leaks addr as return value"
+       },
        {
                "variable-offset ctx access",
                .insns = {
@@ -6782,6 +7247,71 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_LWT_IN,
        },
+       {
+               "indirect variable-offset stack access",
+               .insns = {
+                       /* Fill the top 8 bytes of the stack */
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       /* Get an unknown value */
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+                       /* Make it small and 4-byte aligned */
+                       BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+                       BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+                       /* add it to fp.  We now have either fp-4 or fp-8, but
+                        * we don't know which
+                        */
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+                       /* dereference it indirectly */
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+                                    BPF_FUNC_map_lookup_elem),
+                       BPF_MOV64_IMM(BPF_REG_0, 0),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map1 = { 5 },
+               .errstr = "variable stack read R2",
+               .result = REJECT,
+               .prog_type = BPF_PROG_TYPE_LWT_IN,
+       },
+       {
+               "direct stack access with 32-bit wraparound. test1",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer and 2147483647",
+               .result = REJECT
+       },
+       {
+               "direct stack access with 32-bit wraparound. test2",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer and 1073741823",
+               .result = REJECT
+       },
+       {
+               "direct stack access with 32-bit wraparound. test3",
+               .insns = {
+                       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+                       BPF_MOV32_IMM(BPF_REG_0, 0),
+                       BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_EXIT_INSN()
+               },
+               .errstr = "fp pointer offset 1073741822",
+               .result = REJECT
+       },
        {
                "liveness pruning and write screening",
                .insns = {
@@ -7103,6 +7633,19 @@ static struct bpf_test tests[] = {
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        },
+       {
+               "pkt_end - pkt_start is allowed",
+               .insns = {
+                       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data_end)),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+                                   offsetof(struct __sk_buff, data)),
+                       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+                       BPF_EXIT_INSN(),
+               },
+               .result = ACCEPT,
+               .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       },
        {
                "XDP pkt read, pkt_end mangling, bad access 1",
                .insns = {
@@ -7118,7 +7661,7 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "R1 offset is outside of the packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_XDP,
        },
@@ -7137,7 +7680,7 @@ static struct bpf_test tests[] = {
                        BPF_MOV64_IMM(BPF_REG_0, 0),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "R1 offset is outside of the packet",
+               .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
                .result = REJECT,
                .prog_type = BPF_PROG_TYPE_XDP,
        },
index e57b4ac40e72e0502dff75ea1d80c543280428eb..7177bea1fdfa62a1aa4e424d4dab665d8a9b7aaf 100644 (file)
@@ -1,3 +1,4 @@
 CONFIG_USER_NS=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_TEST_BPF=m
+CONFIG_NUMA=y
index 939a337128dbf3fb08277995de98437b10b037b8..5d4f10ac2af226b58c7ece2f965749b72899ddc6 100644 (file)
@@ -7,7 +7,7 @@ include ../lib.mk
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
                        check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
-                       protection_keys test_vdso
+                       protection_keys test_vdso test_vsyscall
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
                        vdso_restorer
index 66e5ce5b91f008d5dffc7848d0e99d730ad589b5..1aef72df20a112a30c17f2e567ca8b972a550104 100644 (file)
@@ -122,8 +122,7 @@ static void check_valid_segment(uint16_t index, int ldt,
         * NB: Different Linux versions do different things with the
         * accessed bit in set_thread_area().
         */
-       if (ar != expected_ar &&
-           (ldt || ar != (expected_ar | AR_ACCESSED))) {
+       if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
                printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
                       (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
                nerrs++;
@@ -627,13 +626,10 @@ static void do_multicpu_tests(void)
 static int finish_exec_test(void)
 {
        /*
-        * In a sensible world, this would be check_invalid_segment(0, 1);
-        * For better or for worse, though, the LDT is inherited across exec.
-        * We can probably change this safely, but for now we test it.
+        * Older kernel versions did inherit the LDT on exec() which is
+        * wrong because exec() starts from a clean state.
         */
-       check_valid_segment(0, 1,
-                           AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB,
-                           42, true);
+       check_invalid_segment(0, 1);
 
        return nerrs ? 1 : 0;
 }
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644 (file)
index 0000000..7a744fa
--- /dev/null
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+                      int flags)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_sigaction = handler;
+       sa.sa_flags = SA_SIGINFO | flags;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+       void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso)
+               vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+       if (!vdso) {
+               printf("[WARN]\tfailed to find vDSO\n");
+               return;
+       }
+
+       vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+       if (!vdso_gtod)
+               printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+       vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+       if (!vdso_gettime)
+               printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+       vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+       if (!vdso_time)
+               printf("[WARN]\tfailed to find time in vDSO\n");
+
+       vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+       if (!vdso_getcpu) {
+               /* getcpu() was never wired up in the 32-bit vDSO. */
+               printf("[%s]\tfailed to find getcpu in vDSO\n",
+                      sizeof(long) == 8 ? "WARN" : "NOTE");
+       }
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+       int nerrs = 0;
+       FILE *maps;
+       char line[128];
+       bool found = false;
+
+       maps = fopen("/proc/self/maps", "r");
+       if (!maps) {
+               printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+               should_read_vsyscall = true;
+               return 0;
+       }
+
+       while (fgets(line, sizeof(line), maps)) {
+               char r, x;
+               void *start, *end;
+               char name[128];
+               if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+                          &start, &end, &r, &x, name) != 5)
+                       continue;
+
+               if (strcmp(name, "[vsyscall]"))
+                       continue;
+
+               printf("\tvsyscall map: %s", line);
+
+               if (start != (void *)0xffffffffff600000 ||
+                   end != (void *)0xffffffffff601000) {
+                       printf("[FAIL]\taddress range is nonsense\n");
+                       nerrs++;
+               }
+
+               printf("\tvsyscall permissions are %c-%c\n", r, x);
+               should_read_vsyscall = (r == 'r');
+               if (x != 'x') {
+                       vgtod = NULL;
+                       vtime = NULL;
+                       vgetcpu = NULL;
+               }
+
+               found = true;
+               break;
+       }
+
+       fclose(maps);
+
+       if (!found) {
+               printf("\tno vsyscall map in /proc/self/maps\n");
+               should_read_vsyscall = false;
+               vgtod = NULL;
+               vtime = NULL;
+               vgetcpu = NULL;
+       }
+
+       return nerrs;
+#else
+       return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+       return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+       return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+       return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+                             void* cache)
+{
+       return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+       siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+       return (double)(a->tv_sec - b->tv_sec) +
+               (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+                     const struct timeval *tv_sys2,
+                     const struct timezone *tz_sys,
+                     const char *which,
+                     const struct timeval *tv_other,
+                     const struct timezone *tz_other)
+{
+       int nerrs = 0;
+       double d1, d2;
+
+       if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+               printf("[FAIL] %s tz mismatch\n", which);
+               nerrs++;
+       }
+
+       d1 = tv_diff(tv_other, tv_sys1);
+       d2 = tv_diff(tv_sys2, tv_other); 
+       printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+       if (d1 < 0 || d2 < 0) {
+               printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+               nerrs++;
+       } else {
+               printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+       }
+
+       return nerrs;
+}
+
+static int test_gtod(void)
+{
+       struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+       struct timezone tz_sys, tz_vdso, tz_vsys;
+       long ret_vdso = -1;
+       long ret_vsys = -1;
+       int nerrs = 0;
+
+       printf("[RUN]\ttest gettimeofday()\n");
+
+       if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+               err(1, "syscall gettimeofday");
+       if (vdso_gtod)
+               ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+       if (vgtod)
+               ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+       if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+               err(1, "syscall gettimeofday");
+
+       if (vdso_gtod) {
+               if (ret_vdso == 0) {
+                       nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+               } else {
+                       printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+                       nerrs++;
+               }
+       }
+
+       if (vgtod) {
+               if (ret_vsys == 0) {
+                       nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+               } else {
+                       printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+                       nerrs++;
+               }
+       }
+
+       return nerrs;
+}
+
+static int test_time(void) {
+       int nerrs = 0;
+
+       printf("[RUN]\ttest time()\n");
+       long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+       long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+       t_sys1 = sys_time(&t2_sys1);
+       if (vdso_time)
+               t_vdso = vdso_time(&t2_vdso);
+       if (vtime)
+               t_vsys = vtime(&t2_vsys);
+       t_sys2 = sys_time(&t2_sys2);
+       if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+               printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+               nerrs++;
+               return nerrs;
+       }
+
+       if (vdso_time) {
+               if (t_vdso < 0 || t_vdso != t2_vdso) {
+                       printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+                       nerrs++;
+               } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+                       printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+                       nerrs++;
+               } else {
+                       printf("[OK]\tvDSO time() is okay\n");
+               }
+       }
+
+       if (vtime) {
+               if (t_vsys < 0 || t_vsys != t2_vsys) {
+                       printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+                       nerrs++;
+               } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+                       printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+                       nerrs++;
+               } else {
+                       printf("[OK]\tvsyscall time() is okay\n");
+               }
+       }
+
+       return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+       int nerrs = 0;
+       long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+       printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+       cpu_set_t cpuset;
+       CPU_ZERO(&cpuset);
+       CPU_SET(cpu, &cpuset);
+       if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+               printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+               return nerrs;
+       }
+
+       unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+       unsigned node = 0;
+       bool have_node = false;
+       ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+       if (vdso_getcpu)
+               ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+       if (vgetcpu)
+               ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+       if (ret_sys == 0) {
+               if (cpu_sys != cpu) {
+                       printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+                       nerrs++;
+               }
+
+               have_node = true;
+               node = node_sys;
+       }
+
+       if (vdso_getcpu) {
+               if (ret_vdso) {
+                       printf("[FAIL]\tvDSO getcpu() failed\n");
+                       nerrs++;
+               } else {
+                       if (!have_node) {
+                               have_node = true;
+                               node = node_vdso;
+                       }
+
+                       if (cpu_vdso != cpu) {
+                               printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+                               nerrs++;
+                       } else {
+                               printf("[OK]\tvDSO reported correct CPU\n");
+                       }
+
+                       if (node_vdso != node) {
+                               printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+                               nerrs++;
+                       } else {
+                               printf("[OK]\tvDSO reported correct node\n");
+                       }
+               }
+       }
+
+       if (vgetcpu) {
+               if (ret_vsys) {
+                       printf("[FAIL]\tvsyscall getcpu() failed\n");
+                       nerrs++;
+               } else {
+                       if (!have_node) {
+                               have_node = true;
+                               node = node_vsys;
+                       }
+
+                       if (cpu_vsys != cpu) {
+                               printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+                               nerrs++;
+                       } else {
+                               printf("[OK]\tvsyscall reported correct CPU\n");
+                       }
+
+                       if (node_vsys != node) {
+                               printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+                               nerrs++;
+                       } else {
+                               printf("[OK]\tvsyscall reported correct node\n");
+                       }
+               }
+       }
+
+       return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+       printf("[RUN]\tChecking read access to the vsyscall page\n");
+       bool can_read;
+       if (sigsetjmp(jmpbuf, 1) == 0) {
+               *(volatile int *)0xffffffffff600000;
+               can_read = true;
+       } else {
+               can_read = false;
+       }
+
+       if (can_read && !should_read_vsyscall) {
+               printf("[FAIL]\tWe have read access, but we shouldn't\n");
+               return 1;
+       } else if (!can_read && should_read_vsyscall) {
+               printf("[FAIL]\tWe don't have read access, but we should\n");
+               return 1;
+       } else {
+               printf("[OK]\tgot expected result\n");
+       }
+#endif
+
+       return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+       unsigned long eflags;
+       asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+       return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+       asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+       ucontext_t *ctx = (ucontext_t *)ctx_void;
+       unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+       if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+               num_vsyscall_traps++;
+}
+
+static int test_native_vsyscall(void)
+{
+       time_t tmp;
+       bool is_native;
+
+       if (!vtime)
+               return 0;
+
+       printf("[RUN]\tchecking for native vsyscall\n");
+       sethandler(SIGTRAP, sigtrap, 0);
+       set_eflags(get_eflags() | X86_EFLAGS_TF);
+       vtime(&tmp);
+       set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+       /*
+        * If vsyscalls are emulated, we expect a single trap in the
+        * vsyscall page -- the call instruction will trap with RIP
+        * pointing to the entry point before emulation takes over.
+        * In native mode, we expect two traps, since whatever code
+        * the vsyscall page contains will be more than just a ret
+        * instruction.
+        */
+       is_native = (num_vsyscall_traps > 1);
+
+       printf("\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+              (is_native ? "native" : "emulated"),
+              (int)num_vsyscall_traps);
+
+       return 0;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+       int nerrs = 0;
+
+       init_vdso();
+       nerrs += init_vsys();
+
+       nerrs += test_gtod();
+       nerrs += test_time();
+       nerrs += test_getcpu(0);
+       nerrs += test_getcpu(1);
+
+       sethandler(SIGSEGV, sigsegv, 0);
+       nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+       nerrs += test_native_vsyscall();
+#endif
+
+       return nerrs ? 1 : 0;
+}
index 2b3d6d2350158b73a2597dc1a78ac007b6a3563c..3d7b42e7729941b1ca8f35fb7f6d10d4151a1a2f 100644 (file)
@@ -30,6 +30,7 @@ int modify_match_busid(char *busid, int add)
        char command[SYSFS_BUS_ID_SIZE + 4];
        char match_busid_attr_path[SYSFS_PATH_MAX];
        int rc;
+       int cmd_size;
 
        snprintf(match_busid_attr_path, sizeof(match_busid_attr_path),
                 "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME,
@@ -37,12 +38,14 @@ int modify_match_busid(char *busid, int add)
                 attr_name);
 
        if (add)
-               snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s", busid);
+               cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s",
+                                   busid);
        else
-               snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s", busid);
+               cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s",
+                                   busid);
 
        rc = write_sysfs_attribute(match_busid_attr_path, command,
-                                  sizeof(command));
+                                  cmd_size);
        if (rc < 0) {
                dbg("failed to write match_busid: %s", strerror(errno));
                return -1;
index f9555b1e7f158f5203c1aaba47002424d3279203..cc29a814832837f5fb237dfdf74845a284e04367 100644 (file)
@@ -92,16 +92,23 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
        struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
        struct arch_timer_context *vtimer;
+       u32 cnt_ctl;
 
-       if (!vcpu) {
-               pr_warn_once("Spurious arch timer IRQ on non-VCPU thread\n");
-               return IRQ_NONE;
-       }
-       vtimer = vcpu_vtimer(vcpu);
+       /*
+        * We may see a timer interrupt after vcpu_put() has been called which
+        * sets the CPU's vcpu pointer to NULL, because even though the timer
+        * has been disabled in vtimer_save_state(), the hardware interrupt
+        * signal may not have been retired from the interrupt controller yet.
+        */
+       if (!vcpu)
+               return IRQ_HANDLED;
 
+       vtimer = vcpu_vtimer(vcpu);
        if (!vtimer->irq.level) {
-               vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-               if (kvm_timer_irq_can_fire(vtimer))
+               cnt_ctl = read_sysreg_el0(cntv_ctl);
+               cnt_ctl &= ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT |
+                          ARCH_TIMER_CTRL_IT_MASK;
+               if (cnt_ctl == (ARCH_TIMER_CTRL_ENABLE | ARCH_TIMER_CTRL_IT_STAT))
                        kvm_timer_update_irq(vcpu, true, vtimer);
        }
 
@@ -355,6 +362,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
 
        /* Disable the virtual timer */
        write_sysreg_el0(0, cntv_ctl);
+       isb();
 
        vtimer->loaded = false;
 out:
@@ -720,7 +728,7 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
        return 0;
 }
 
-int kvm_timer_hyp_init(void)
+int kvm_timer_hyp_init(bool has_gic)
 {
        struct arch_timer_kvm_info *info;
        int err;
@@ -756,10 +764,13 @@ int kvm_timer_hyp_init(void)
                return err;
        }
 
-       err = irq_set_vcpu_affinity(host_vtimer_irq, kvm_get_running_vcpus());
-       if (err) {
-               kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
-               goto out_free_irq;
+       if (has_gic) {
+               err = irq_set_vcpu_affinity(host_vtimer_irq,
+                                           kvm_get_running_vcpus());
+               if (err) {
+                       kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
+                       goto out_free_irq;
+               }
        }
 
        kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
@@ -835,10 +846,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 no_vgic:
        preempt_disable();
        timer->enabled = 1;
-       if (!irqchip_in_kernel(vcpu->kvm))
-               kvm_timer_vcpu_load_user(vcpu);
-       else
-               kvm_timer_vcpu_load_vgic(vcpu);
+       kvm_timer_vcpu_load(vcpu);
        preempt_enable();
 
        return 0;
index 6b60c98a6e2294c773eb20ea4794445a667415ea..2e43f9d42bd5db2a07438bb98f5e029c6246adb4 100644 (file)
@@ -1326,7 +1326,7 @@ static int init_subsystems(void)
        /*
         * Init HYP architected timer support
         */
-       err = kvm_timer_hyp_init();
+       err = kvm_timer_hyp_init(vgic_present);
        if (err)
                goto out;
 
index b6e715fd3c90af8c74408b72652f9974a3fb894d..dac7ceb1a677746cadb086a2cf8a07d8e560373c 100644 (file)
@@ -112,7 +112,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
                }
 
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
-                              data);
+                              &data);
                data = vcpu_data_host_to_guest(vcpu, data, len);
                vcpu_set_reg(vcpu, vcpu->arch.mmio_decode.rt, data);
        }
@@ -182,14 +182,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt),
                                               len);
 
-               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data);
                kvm_mmio_write_buf(data_buf, len, data);
 
                ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
                                       data_buf);
        } else {
                trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
-                              fault_ipa, 0);
+                              fault_ipa, NULL);
 
                ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
                                      data_buf);
index b36945d49986dd5c0f097f16837d72d81f655308..b4b69c2d10120237e12bc6524243071bf645f1d9 100644 (file)
@@ -509,8 +509,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
  */
 void free_hyp_pgds(void)
 {
-       unsigned long addr;
-
        mutex_lock(&kvm_hyp_pgd_mutex);
 
        if (boot_hyp_pgd) {
@@ -521,10 +519,10 @@ void free_hyp_pgds(void)
 
        if (hyp_pgd) {
                unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
-               for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE)
-                       unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
-               for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
-                       unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE);
+               unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
+                               (uintptr_t)high_memory - PAGE_OFFSET);
+               unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
+                               VMALLOC_END - VMALLOC_START);
 
                free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
                hyp_pgd = NULL;