Merge tag 'char-misc-4.15-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregk...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 31 Dec 2017 18:52:51 +0000 (10:52 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 31 Dec 2017 18:52:51 +0000 (10:52 -0800)
Pull char/misc fixes from Greg KH:
 "Here are six small fixes of some of the char/misc drivers that have
  been sent in to resolve reported issues.

  Nothing major, a binder use-after-free fix, some thunderbolt bugfixes,
  a hyper-v bugfix, and an nvmem driver fix. All of these have been in
  linux-next with no reported issues for a while"

* tag 'char-misc-4.15-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc:
  nvmem: meson-mx-efuse: fix reading from an offset other than 0
  binder: fix proc->files use-after-free
  vmbus: unregister device_obj->channels_kset
  thunderbolt: Mask ring interrupt properly when polling starts
  MAINTAINERS: Add thunderbolt.rst to the Thunderbolt driver entry
  thunderbolt: Make pathname to force_power shorter

743 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/arm64/silicon-errata.txt
Documentation/cgroup-v2.txt
Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
Documentation/devicetree/bindings/sound/da7218.txt
Documentation/devicetree/bindings/sound/da7219.txt
Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
Documentation/filesystems/overlayfs.txt
Documentation/locking/crossrelease.txt [deleted file]
Documentation/vm/zswap.txt
Documentation/x86/x86_64/mm.txt
MAINTAINERS
Makefile
arch/arm/boot/dts/vf610-zii-dev-rev-c.dts
arch/arm/lib/csumpartialcopyuser.S
arch/arm64/Kconfig
arch/arm64/include/asm/assembler.h
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/cpu-reset.S
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/efi-entry.S
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/head.S
arch/arm64/kernel/hw_breakpoint.c
arch/arm64/kernel/relocate_kernel.S
arch/arm64/kvm/hyp-init.S
arch/arm64/kvm/hyp/debug-sr.c
arch/arm64/mm/dump.c
arch/arm64/mm/fault.c
arch/arm64/mm/init.c
arch/parisc/boot/compressed/misc.c
arch/parisc/include/asm/thread_info.h
arch/parisc/kernel/entry.S
arch/parisc/kernel/hpmc.S
arch/parisc/kernel/unwind.c
arch/parisc/lib/delay.c
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/kernel/process.c
arch/powerpc/kvm/book3s_xive.c
arch/powerpc/net/bpf_jit_comp64.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/imc-pmu.c
arch/riscv/include/asm/barrier.h
arch/riscv/kernel/setup.c
arch/riscv/kernel/sys_riscv.c
arch/s390/include/asm/pgtable.h
arch/s390/kernel/compat_linux.c
arch/s390/net/bpf_jit_comp.c
arch/sparc/lib/hweight.S
arch/sparc/mm/fault_32.c
arch/sparc/mm/fault_64.c
arch/sparc/mm/gup.c
arch/sparc/net/bpf_jit_comp_64.c
arch/um/include/asm/Kbuild
arch/um/include/asm/mmu_context.h
arch/um/kernel/trap.c
arch/unicore32/include/asm/mmu_context.h
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/x86/boot/compressed/Makefile
arch/x86/boot/compressed/head_64.S
arch/x86/boot/compressed/misc.c
arch/x86/boot/compressed/pagetable.c
arch/x86/boot/compressed/pgtable_64.c [new file with mode: 0644]
arch/x86/boot/genimage.sh
arch/x86/crypto/salsa20_glue.c
arch/x86/entry/calling.h
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/vsyscall/vsyscall_64.c
arch/x86/events/intel/ds.c
arch/x86/events/perf_event.h
arch/x86/include/asm/cpu_entry_area.h [new file with mode: 0644]
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/desc.h
arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/espfix.h
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/hypervisor.h
arch/x86/include/asm/intel_ds.h [new file with mode: 0644]
arch/x86/include/asm/invpcid.h [new file with mode: 0644]
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/kdebug.h
arch/x86/include/asm/mmu.h
arch/x86/include/asm/mmu_context.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/pgalloc.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_32_types.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/pgtable_64_types.h
arch/x86/include/asm/processor-flags.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/pti.h [new file with mode: 0644]
arch/x86/include/asm/stacktrace.h
arch/x86/include/asm/suspend_32.h
arch/x86/include/asm/suspend_64.h
arch/x86/include/asm/switch_to.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/traps.h
arch/x86/include/asm/unwind.h
arch/x86/include/asm/vsyscall.h
arch/x86/include/uapi/asm/processor-flags.h
arch/x86/kernel/asm-offsets.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/doublefault.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/head_64.S
arch/x86/kernel/ioport.c
arch/x86/kernel/irq.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/ldt.c
arch/x86/kernel/paravirt_patch_64.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/tls.c
arch/x86/kernel/traps.c
arch/x86/kernel/unwind_orc.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/emulate.c
arch/x86/kvm/mmu.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/delay.c
arch/x86/lib/x86-opcode-map.txt
arch/x86/mm/Makefile
arch/x86/mm/cpu_entry_area.c [new file with mode: 0644]
arch/x86/mm/debug_pagetables.c
arch/x86/mm/dump_pagetables.c
arch/x86/mm/fault.c
arch/x86/mm/init.c
arch/x86/mm/init_32.c
arch/x86/mm/ioremap.c
arch/x86/mm/kasan_init_64.c
arch/x86/mm/kmmio.c
arch/x86/mm/pgtable.c
arch/x86/mm/pgtable_32.c
arch/x86/mm/pti.c [new file with mode: 0644]
arch/x86/mm/tlb.c
arch/x86/pci/fixup.c
arch/x86/platform/efi/efi_64.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/power/cpu.c
arch/x86/xen/apic.c
arch/x86/xen/enlighten.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/setup.c
block/bio.c
block/blk-map.c
block/blk-throttle.c
block/bounce.c
block/kyber-iosched.c
crypto/af_alg.c
crypto/algif_aead.c
crypto/algif_skcipher.c
crypto/hmac.c
crypto/mcryptd.c
crypto/rsa_helper.c
crypto/salsa20_generic.c
crypto/shash.c
crypto/skcipher.c
drivers/acpi/apei/erst.c
drivers/acpi/cppc_acpi.c
drivers/acpi/device_pm.c
drivers/acpi/nfit/core.c
drivers/ata/ahci_mtk.c
drivers/ata/ahci_qoriq.c
drivers/ata/libata-core.c
drivers/ata/pata_pdc2027x.c
drivers/base/cacheinfo.c
drivers/base/power/main.c
drivers/block/null_blk.c
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/ipmi/ipmi_si_parisc.c
drivers/char/ipmi/ipmi_si_pci.c
drivers/clk/clk.c
drivers/clk/sunxi/clk-sun9i-mmc.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/imx6q-cpufreq.c
drivers/dma/at_hdmac.c
drivers/dma/dma-jz4740.c
drivers/dma/dmatest.c
drivers/dma/fsl-edma.c
drivers/dma/ioat/init.c
drivers/gpio/gpio-reg.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib-devprop.c
drivers/gpio/gpiolib-of.c
drivers/gpio/gpiolib.h
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
drivers/gpu/drm/drm_connector.c
drivers/gpu/drm/drm_crtc_internal.h
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_lease.c
drivers/gpu/drm/drm_mm.c
drivers/gpu/drm/drm_mode_config.c
drivers/gpu/drm/drm_plane.c
drivers/gpu/drm/drm_syncobj.c
drivers/gpu/drm/i915/gvt/display.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_sw_fence.c
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_ddi.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_lpe_audio.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nouveau_drm.c
drivers/gpu/drm/nouveau/nouveau_drv.h
drivers/gpu/drm/nouveau/nouveau_fbcon.c
drivers/gpu/drm/nouveau/nouveau_mem.c
drivers/gpu/drm/nouveau/nouveau_ttm.c
drivers/gpu/drm/nouveau/nouveau_vmm.c
drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/gpu/drm/ttm/ttm_page_alloc.c
drivers/gpu/drm/vc4/vc4_gem.c
drivers/gpu/drm/vc4/vc4_irq.c
drivers/hid/hid-core.c
drivers/hid/hid-cp2112.c
drivers/hid/hid-holtekff.c
drivers/hwmon/hwmon.c
drivers/hwtracing/stm/ftrace.c
drivers/i2c/busses/i2c-cht-wc.c
drivers/i2c/busses/i2c-piix4.c
drivers/i2c/busses/i2c-stm32.h
drivers/i2c/busses/i2c-stm32f4.c
drivers/i2c/busses/i2c-stm32f7.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/device.c
drivers/infiniband/core/iwcm.c
drivers/infiniband/core/nldev.c
drivers/infiniband/core/security.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx5/cmd.c
drivers/infiniband/hw/mlx5/cmd.h
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/leds/led-core.c
drivers/md/dm-bufio.c
drivers/md/dm-cache-target.c
drivers/md/dm-mpath.c
drivers/md/dm-snap.c
drivers/md/dm-table.c
drivers/md/dm-thin.c
drivers/mfd/cros_ec_spi.c
drivers/mfd/twl4030-audio.c
drivers/mfd/twl6040.c
drivers/misc/eeprom/at24.c
drivers/misc/pti.c
drivers/mmc/core/card.h
drivers/mmc/core/mmc.c
drivers/mmc/core/quirks.h
drivers/mtd/mtdcore.c
drivers/mtd/nand/brcmnand/brcmnand.c
drivers/mtd/nand/gpio.c
drivers/mtd/nand/gpmi-nand/gpmi-nand.c
drivers/net/dsa/mv88e6xxx/port.c
drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
drivers/net/ethernet/aquantia/atlantic/aq_hw.h
drivers/net/ethernet/aquantia/atlantic/aq_nic.c
drivers/net/ethernet/aquantia/atlantic/aq_nic.h
drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
drivers/net/ethernet/aquantia/atlantic/ver.h
drivers/net/ethernet/arc/emac.h
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/arc/emac_rockchip.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/broadcom/tg3.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/skge.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mellanox/mlx4/en_port.c
drivers/net/ethernet/mellanox/mlx4/en_selftest.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/net/ethernet/mellanox/mlx5/core/lag.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/qp.c
drivers/net/ethernet/mellanox/mlx5/core/rl.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
drivers/net/ethernet/mellanox/mlx5/core/vxlan.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/netronome/nfp/bpf/main.c
drivers/net/ethernet/netronome/nfp/bpf/main.h
drivers/net/ethernet/qualcomm/emac/emac-phy.c
drivers/net/ethernet/qualcomm/emac/emac.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/enh_desc.c
drivers/net/ethernet/stmicro/stmmac/norm_desc.c
drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/hippi/rrunner.c
drivers/net/phy/at803x.c
drivers/net/phy/marvell.c
drivers/net/phy/mdio-xgene.c
drivers/net/phy/mdio_bus.c
drivers/net/phy/meson-gxl.c
drivers/net/phy/micrel.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/usb/qmi_wwan.c
drivers/net/vxlan.c
drivers/net/wireless/mac80211_hwsim.c
drivers/nvdimm/btt.c
drivers/nvdimm/btt.h
drivers/nvdimm/pfn_devs.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/of/of_mdio.c
drivers/parisc/lba_pci.c
drivers/pci/host/pcie-rcar.c
drivers/pci/pci-driver.c
drivers/phy/motorola/phy-cpcap-usb.c
drivers/phy/renesas/Kconfig
drivers/phy/rockchip/phy-rockchip-typec.c
drivers/phy/tegra/xusb.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/platform/x86/asus-wireless.c
drivers/platform/x86/dell-laptop.c
drivers/platform/x86/dell-wmi.c
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_l3.h
drivers/s390/net/qeth_l3_main.c
drivers/s390/net/qeth_l3_sys.c
drivers/scsi/aacraid/aacraid.h
drivers/scsi/aacraid/commsup.c
drivers/scsi/aacraid/linit.c
drivers/scsi/bfa/bfad_bsg.c
drivers/scsi/bfa/bfad_im.c
drivers/scsi/bfa/bfad_im.h
drivers/scsi/libfc/fc_lport.c
drivers/scsi/libsas/sas_expander.c
drivers/scsi/lpfc/lpfc_mem.c
drivers/scsi/osd/osd_initiator.c
drivers/scsi/scsi_debugfs.c
drivers/scsi/scsi_devinfo.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_scan.c
drivers/scsi/scsi_sysfs.c
drivers/scsi/scsi_transport_spi.c
drivers/scsi/sd.c
drivers/scsi/storvsc_drv.c
drivers/spi/spi-armada-3700.c
drivers/spi/spi-atmel.c
drivers/spi/spi-rspi.c
drivers/spi/spi-sun4i.c
drivers/spi/spi-xilinx.c
drivers/staging/android/ion/Kconfig
drivers/staging/android/ion/ion.c
drivers/staging/android/ion/ion_cma_heap.c
drivers/staging/ccree/ssi_hash.c
drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c
drivers/staging/pi433/rf69.c
drivers/target/target_core_pscsi.c
drivers/tty/n_tty.c
drivers/usb/chipidea/ci_hdrc_msm.c
drivers/usb/core/config.c
drivers/usb/core/quirks.c
drivers/usb/dwc2/core.h
drivers/usb/dwc2/gadget.c
drivers/usb/dwc2/params.c
drivers/usb/dwc3/dwc3-of-simple.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/Kconfig
drivers/usb/gadget/legacy/Kconfig
drivers/usb/host/xhci-debugfs.c
drivers/usb/host/xhci-mem.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/musb/da8xx.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/option.c
drivers/usb/serial/qcserial.c
drivers/usb/storage/unusual_devs.h
drivers/usb/storage/unusual_uas.h
drivers/usb/usbip/stub_dev.c
drivers/usb/usbip/stub_main.c
drivers/usb/usbip/stub_rx.c
drivers/usb/usbip/stub_tx.c
drivers/usb/usbip/usbip_common.c
drivers/usb/usbip/usbip_common.h
drivers/usb/usbip/vhci_hcd.c
drivers/usb/usbip/vhci_rx.c
drivers/usb/usbip/vhci_sysfs.c
drivers/usb/usbip/vhci_tx.c
drivers/virtio/virtio_mmio.c
drivers/xen/Kconfig
drivers/xen/balloon.c
fs/autofs4/waitq.c
fs/ceph/mds_client.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cramfs/Kconfig
fs/dax.c
fs/exec.c
fs/ext4/extents.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/namei.c
fs/namespace.c
fs/nfs/client.c
fs/nfs/nfs4client.c
fs/nfs/write.c
fs/nfsd/auth.c
fs/overlayfs/Kconfig
fs/overlayfs/dir.c
fs/overlayfs/namei.c
fs/overlayfs/overlayfs.h
fs/overlayfs/ovl_entry.h
fs/overlayfs/readdir.c
fs/overlayfs/super.c
fs/super.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_attr.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_leaf.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_defer.h
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc.h
fs/xfs/libxfs/xfs_iext_tree.c
fs/xfs/libxfs/xfs_refcount.c
fs/xfs/libxfs/xfs_rmap.c
fs/xfs/libxfs/xfs_rmap.h
fs/xfs/scrub/scrub.c
fs/xfs/scrub/trace.c
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_icache.h
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_iomap.c
fs/xfs/xfs_qm.c
fs/xfs/xfs_reflink.c
fs/xfs/xfs_super.c
fs/xfs/xfs_symlink.c
fs/xfs/xfs_trace.c
include/asm-generic/mm_hooks.h
include/asm-generic/pgtable.h
include/crypto/internal/hash.h
include/crypto/mcryptd.h
include/drm/drm_connector.h
include/drm/drm_edid.h
include/drm/drm_mode_config.h
include/kvm/arm_arch_timer.h
include/linux/bio.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/bpf_verifier.h
include/linux/compiler.h
include/linux/completion.h
include/linux/cred.h
include/linux/idr.h
include/linux/intel-pti.h [new file with mode: 0644]
include/linux/ipv6.h
include/linux/lockdep.h
include/linux/mfd/rtsx_pci.h
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/linux/oom.h
include/linux/pci.h
include/linux/pm.h
include/linux/pti.h
include/linux/ptr_ring.h
include/linux/rbtree.h
include/linux/rwlock_types.h
include/linux/sched.h
include/linux/sched/coredump.h
include/linux/spi/spi.h
include/linux/spinlock.h
include/linux/spinlock_types.h
include/linux/string.h
include/linux/tick.h
include/linux/trace.h
include/net/cfg80211.h
include/net/gue.h
include/net/ip.h
include/net/pkt_cls.h
include/net/sch_generic.h
include/net/sock.h
include/net/xfrm.h
include/trace/events/clk.h
include/trace/events/kvm.h
include/trace/events/preemptirq.h
include/trace/events/tcp.h
include/uapi/linux/pkt_sched.h
include/uapi/linux/rtnetlink.h
include/xen/balloon.h
init/main.c
kernel/bpf/hashtab.c
kernel/bpf/verifier.c
kernel/cgroup/debug.c
kernel/cgroup/stat.c
kernel/exit.c
kernel/fork.c
kernel/groups.c
kernel/kcov.c
kernel/locking/lockdep.c
kernel/locking/spinlock.c
kernel/sched/core.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/rt.c
kernel/time/posix-timers.c
kernel/time/tick-sched.c
kernel/trace/Kconfig
kernel/trace/bpf_trace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace_stack.c
kernel/uid16.c
kernel/workqueue.c
lib/Kconfig.debug
lib/kobject_uevent.c
lib/rbtree.c
lib/test_bpf.c
mm/backing-dev.c
mm/early_ioremap.c
mm/frame_vector.c
mm/gup.c
mm/hmm.c
mm/huge_memory.c
mm/kmemleak.c
mm/memory.c
mm/mmap.c
mm/oom_kill.c
mm/page_alloc.c
mm/percpu.c
mm/slab.c
net/batman-adv/bat_iv_ogm.c
net/batman-adv/bat_v.c
net/batman-adv/fragmentation.c
net/batman-adv/tp_meter.c
net/bridge/br_netlink.c
net/core/dev.c
net/core/net_namespace.c
net/core/netprio_cgroup.c
net/core/skbuff.c
net/dsa/slave.c
net/ipv4/devinet.c
net/ipv4/fib_frontend.c
net/ipv4/fib_semantics.c
net/ipv4/igmp.c
net/ipv4/ip_gre.c
net/ipv4/ip_tunnel.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/raw.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_timer.c
net/ipv4/xfrm4_input.c
net/ipv6/af_inet6.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ipv6_sockglue.c
net/ipv6/mcast.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/ip6t_MASQUERADE.c
net/ipv6/route.c
net/ipv6/tcp_ipv6.c
net/ipv6/xfrm6_input.c
net/mac80211/ht.c
net/netfilter/nf_conntrack_h323_asn1.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink_cthelper.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue.c
net/netfilter/nft_exthdr.c
net/netfilter/x_tables.c
net/netfilter/xt_bpf.c
net/netfilter/xt_osf.c
net/netlink/af_netlink.c
net/openvswitch/flow.c
net/rds/send.c
net/sched/act_meta_mark.c
net/sched/act_meta_skbtcindex.c
net/sched/cls_api.c
net/sched/cls_bpf.c
net/sched/cls_u32.c
net/sched/sch_api.c
net/sched/sch_generic.c
net/sched/sch_ingress.c
net/sched/sch_red.c
net/sctp/debug.c
net/sctp/socket.c
net/sctp/ulpqueue.c
net/strparser/strparser.c
net/sunrpc/auth_gss/gss_rpc_xdr.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/svcauth_unix.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/tipc/bearer.c
net/tipc/group.c
net/tipc/monitor.c
net/tipc/socket.c
net/wireless/Makefile
net/wireless/certs/sforshee.hex [new file with mode: 0644]
net/wireless/certs/sforshee.x509 [deleted file]
net/wireless/nl80211.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
scripts/checkpatch.pl
scripts/faddr2line
security/Kconfig
sound/core/rawmidi.c
sound/hda/hdac_i915.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/amd/acp-pcm-dma.c
sound/soc/atmel/Kconfig
sound/soc/codecs/da7218.c
sound/soc/codecs/msm8916-wcd-analog.c
sound/soc/codecs/msm8916-wcd-digital.c
sound/soc/codecs/nau8825.c
sound/soc/codecs/rt5514-spi.c
sound/soc/codecs/rt5514.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/rt5663.c
sound/soc/codecs/rt5663.h
sound/soc/codecs/tlv320aic31xx.h
sound/soc/codecs/twl4030.c
sound/soc/codecs/wm_adsp.c
sound/soc/fsl/fsl_asrc.h
sound/soc/fsl/fsl_ssi.c
sound/soc/intel/boards/kbl_rt5663_max98927.c
sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c
sound/soc/intel/skylake/skl-nhlt.c
sound/soc/intel/skylake/skl-topology.c
sound/soc/rockchip/rockchip_spdif.c
sound/soc/sh/rcar/adg.c
sound/soc/sh/rcar/core.c
sound/soc/sh/rcar/dma.c
sound/soc/sh/rcar/ssi.c
sound/soc/sh/rcar/ssiu.c
sound/usb/mixer.c
sound/usb/quirks.c
tools/arch/s390/include/uapi/asm/bpf_perf_event.h
tools/arch/x86/include/asm/cpufeatures.h
tools/bpf/bpftool/map.c
tools/bpf/bpftool/prog.c
tools/include/linux/compiler.h
tools/include/linux/lockdep.h
tools/include/uapi/asm/bpf_perf_event.h [new file with mode: 0644]
tools/include/uapi/linux/kvm.h
tools/kvm/kvm_stat/kvm_stat
tools/kvm/kvm_stat/kvm_stat.txt
tools/objtool/arch/x86/lib/x86-opcode-map.txt
tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
tools/perf/util/mmap.h
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/net/config
tools/testing/selftests/x86/ldt_gdt.c
tools/usb/usbip/libsrc/vhci_driver.c
tools/usb/usbip/src/utils.c
tools/virtio/ringtest/ptr_ring.c
tools/vm/slabinfo-gnuplot.sh
virt/kvm/arm/arch_timer.c
virt/kvm/arm/arm.c
virt/kvm/arm/mmio.c
virt/kvm/arm/mmu.c

index 6571fbfdb2a1527c25b3a01e9c4228c84adce639..e49311d5350496b1db6e106634da7a09b58fc4a3 100644 (file)
                        steal time is computed, but won't influence scheduler
                        behaviour
 
+       nopti           [X86-64] Disable kernel page table isolation
+
        nolapic         [X86-32,APIC] Do not enable or use the local APIC.
 
        nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
        pt.             [PARIDE]
                        See Documentation/blockdev/paride.txt.
 
+       pti=            [X86_64]
+                       Control user/kernel address space isolation:
+                       on - enable
+                       off - disable
+                       auto - default setting
+
        pty.legacy_count=
                        [KNL] Number of legacy pty's. Overwrites compiled-in
                        default number.
index 304bf22bb83cc0ec8dfbbcf2a48b206ecb781afb..fc1c884fea10497357f889b11e33c6d323fecf55 100644 (file)
@@ -75,3 +75,4 @@ stable kernels.
 | Qualcomm Tech. | Falkor v1       | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 | Qualcomm Tech. | Falkor v1       | E1009           | QCOM_FALKOR_ERRATUM_1009    |
 | Qualcomm Tech. | QDF2400 ITS     | E0065           | QCOM_QDF2400_ERRATUM_0065   |
+| Qualcomm Tech. | Falkor v{1,2}   | E1041           | QCOM_FALKOR_ERRATUM_1041    |
index 779211fbb69ffac450f22b0ad6864c7c6c2bd98f..2cddab7efb20df0dcf07c4d7d64a5611138a8d7c 100644 (file)
@@ -898,6 +898,13 @@ controller implements weight and absolute bandwidth limit models for
 normal scheduling policy and absolute bandwidth allocation model for
 realtime scheduling policy.
 
+WARNING: cgroup2 doesn't yet support control of realtime processes and
+the cpu controller can only be enabled when all RT processes are in
+the root cgroup.  Be aware that system management software may already
+have placed RT processes into nonroot cgroups during the system boot
+process, and these processes may need to be moved to the root cgroup
+before the cpu controller can be enabled.
+
 
 CPU Interface Files
 ~~~~~~~~~~~~~~~~~~~
index 376fa2f50e6bc9b41052928037acd4b3a382d380..956bb046e599d576e3f881b2901e0d369a3c9802 100644 (file)
@@ -13,7 +13,6 @@ Required properties:
                  at25df321a
                  at25df641
                  at26df081a
-                 en25s64
                  mr25h128
                  mr25h256
                  mr25h10
@@ -33,7 +32,6 @@ Required properties:
                  s25fl008k
                  s25fl064k
                  sst25vf040b
-                 sst25wf040b
                  m25p40
                  m25p80
                  m25p16
index 5ca5a709b6aa1989901eff6b9239f0bc71d0d272..3ab9dfef38d113523549e66cee20ad8db6e56842 100644 (file)
@@ -73,7 +73,7 @@ Example:
                compatible = "dlg,da7218";
                reg = <0x1a>;
                interrupt-parent = <&gpio6>;
-               interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
                wakeup-source;
 
                VDD-supply = <&reg_audio>;
index cf61681826b675ad984dc17f6c1dbcc2e1b6fc35..5b54d2d045c355808bf0f58afba9ec7539ff4472 100644 (file)
@@ -77,7 +77,7 @@ Example:
                reg = <0x1a>;
 
                interrupt-parent = <&gpio6>;
-               interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
 
                VDD-supply = <&reg_audio>;
                VDDMIC-supply = <&reg_audio>;
index 5bf13960f7f4a3c826c10b1e15a618df82d82403..e3c48b20b1a691b37d0b425251a257c682a38eca 100644 (file)
@@ -12,24 +12,30 @@ Required properties:
   - "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
 - reg : Offset and length of the register set for the device
 - interrupts : Should contain CSPI/eCSPI interrupt
-- cs-gpios : Specifies the gpio pins to be used for chipselects.
 - clocks : Clock specifiers for both ipg and per clocks.
 - clock-names : Clock names should include both "ipg" and "per"
 See the clock consumer binding,
        Documentation/devicetree/bindings/clock/clock-bindings.txt
-- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
-               Documentation/devicetree/bindings/dma/dma.txt
-- dma-names: DMA request names should include "tx" and "rx" if present.
 
-Obsolete properties:
-- fsl,spi-num-chipselects : Contains the number of the chipselect
+Recommended properties:
+- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt.  While the native chip
+select lines can be used, they appear to always generate a pulse between each
+word of a transfer.  Most use cases will require GPIO based chip selects to
+generate a valid transaction.
 
 Optional properties:
+- num-cs :  Number of total chip selects, see spi-bus.txt.
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+Documentation/devicetree/bindings/dma/dma.txt.
+- dma-names: DMA request names, if present, should include "tx" and "rx".
 - fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
 controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
 the SPI_READY mode-flag needs to be set too.
 Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
 
+Obsolete properties:
+- fsl,spi-num-chipselects : Contains the number of the chipselect
+
 Example:
 
 ecspi@70010000 {
index 8caa60734647f70a777b8a568ba9f2dd99182fb8..e6a5f4912b6d4a4910ed1d2fc494fff304ab47ff 100644 (file)
@@ -156,6 +156,40 @@ handle it in two different ways:
    root of the overlay.  Finally the directory is moved to the new
    location.
 
+There are several ways to tune the "redirect_dir" feature.
+
+Kernel config options:
+
+- OVERLAY_FS_REDIRECT_DIR:
+    If this is enabled, then redirect_dir is turned on by  default.
+- OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW:
+    If this is enabled, then redirects are always followed by default. Enabling
+    this results in a less secure configuration.  Enable this option only when
+    worried about backward compatibility with kernels that have the redirect_dir
+    feature and follow redirects even if turned off.
+
+Module options (can also be changed through /sys/module/overlay/parameters/*):
+
+- "redirect_dir=BOOL":
+    See OVERLAY_FS_REDIRECT_DIR kernel config option above.
+- "redirect_always_follow=BOOL":
+    See OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW kernel config option above.
+- "redirect_max=NUM":
+    The maximum number of bytes in an absolute redirect (default is 256).
+
+Mount options:
+
+- "redirect_dir=on":
+    Redirects are enabled.
+- "redirect_dir=follow":
+    Redirects are not created, but followed.
+- "redirect_dir=off":
+    Redirects are not created and only followed if "redirect_always_follow"
+    feature is enabled in the kernel/module config.
+- "redirect_dir=nofollow":
+    Redirects are not created and not followed (equivalent to "redirect_dir=off"
+    if "redirect_always_follow" feature is not enabled).
+
 Non-directories
 ---------------
 
diff --git a/Documentation/locking/crossrelease.txt b/Documentation/locking/crossrelease.txt
deleted file mode 100644 (file)
index bdf1423..0000000
+++ /dev/null
@@ -1,874 +0,0 @@
-Crossrelease
-============
-
-Started by Byungchul Park <byungchul.park@lge.com>
-
-Contents:
-
- (*) Background
-
-     - What causes deadlock
-     - How lockdep works
-
- (*) Limitation
-
-     - Limit lockdep
-     - Pros from the limitation
-     - Cons from the limitation
-     - Relax the limitation
-
- (*) Crossrelease
-
-     - Introduce crossrelease
-     - Introduce commit
-
- (*) Implementation
-
-     - Data structures
-     - How crossrelease works
-
- (*) Optimizations
-
-     - Avoid duplication
-     - Lockless for hot paths
-
- (*) APPENDIX A: What lockdep does to work aggresively
-
- (*) APPENDIX B: How to avoid adding false dependencies
-
-
-==========
-Background
-==========
-
-What causes deadlock
---------------------
-
-A deadlock occurs when a context is waiting for an event to happen,
-which is impossible because another (or the) context who can trigger the
-event is also waiting for another (or the) event to happen, which is
-also impossible due to the same reason.
-
-For example:
-
-   A context going to trigger event C is waiting for event A to happen.
-   A context going to trigger event A is waiting for event B to happen.
-   A context going to trigger event B is waiting for event C to happen.
-
-A deadlock occurs when these three wait operations run at the same time,
-because event C cannot be triggered if event A does not happen, which in
-turn cannot be triggered if event B does not happen, which in turn
-cannot be triggered if event C does not happen. After all, no event can
-be triggered since any of them never meets its condition to wake up.
-
-A dependency might exist between two waiters and a deadlock might happen
-due to an incorrect releationship between dependencies. Thus, we must
-define what a dependency is first. A dependency exists between them if:
-
-   1. There are two waiters waiting for each event at a given time.
-   2. The only way to wake up each waiter is to trigger its event.
-   3. Whether one can be woken up depends on whether the other can.
-
-Each wait in the example creates its dependency like:
-
-   Event C depends on event A.
-   Event A depends on event B.
-   Event B depends on event C.
-
-   NOTE: Precisely speaking, a dependency is one between whether a
-   waiter for an event can be woken up and whether another waiter for
-   another event can be woken up. However from now on, we will describe
-   a dependency as if it's one between an event and another event for
-   simplicity.
-
-And they form circular dependencies like:
-
-    -> C -> A -> B -
-   /                \
-   \                /
-    ----------------
-
-   where 'A -> B' means that event A depends on event B.
-
-Such circular dependencies lead to a deadlock since no waiter can meet
-its condition to wake up as described.
-
-CONCLUSION
-
-Circular dependencies cause a deadlock.
-
-
-How lockdep works
------------------
-
-Lockdep tries to detect a deadlock by checking dependencies created by
-lock operations, acquire and release. Waiting for a lock corresponds to
-waiting for an event, and releasing a lock corresponds to triggering an
-event in the previous section.
-
-In short, lockdep does:
-
-   1. Detect a new dependency.
-   2. Add the dependency into a global graph.
-   3. Check if that makes dependencies circular.
-   4. Report a deadlock or its possibility if so.
-
-For example, consider a graph built by lockdep that looks like:
-
-   A -> B -
-           \
-            -> E
-           /
-   C -> D -
-
-   where A, B,..., E are different lock classes.
-
-Lockdep will add a dependency into the graph on detection of a new
-dependency. For example, it will add a dependency 'E -> C' when a new
-dependency between lock E and lock C is detected. Then the graph will be:
-
-       A -> B -
-               \
-                -> E -
-               /      \
-    -> C -> D -        \
-   /                   /
-   \                  /
-    ------------------
-
-   where A, B,..., E are different lock classes.
-
-This graph contains a subgraph which demonstrates circular dependencies:
-
-                -> E -
-               /      \
-    -> C -> D -        \
-   /                   /
-   \                  /
-    ------------------
-
-   where C, D and E are different lock classes.
-
-This is the condition under which a deadlock might occur. Lockdep
-reports it on detection after adding a new dependency. This is the way
-how lockdep works.
-
-CONCLUSION
-
-Lockdep detects a deadlock or its possibility by checking if circular
-dependencies were created after adding each new dependency.
-
-
-==========
-Limitation
-==========
-
-Limit lockdep
--------------
-
-Limiting lockdep to work on only typical locks e.g. spin locks and
-mutexes, which are released within the acquire context, the
-implementation becomes simple but its capacity for detection becomes
-limited. Let's check pros and cons in next section.
-
-
-Pros from the limitation
-------------------------
-
-Given the limitation, when acquiring a lock, locks in a held_locks
-cannot be released if the context cannot acquire it so has to wait to
-acquire it, which means all waiters for the locks in the held_locks are
-stuck. It's an exact case to create dependencies between each lock in
-the held_locks and the lock to acquire.
-
-For example:
-
-   CONTEXT X
-   ---------
-   acquire A
-   acquire B /* Add a dependency 'A -> B' */
-   release B
-   release A
-
-   where A and B are different lock classes.
-
-When acquiring lock A, the held_locks of CONTEXT X is empty thus no
-dependency is added. But when acquiring lock B, lockdep detects and adds
-a new dependency 'A -> B' between lock A in the held_locks and lock B.
-They can be simply added whenever acquiring each lock.
-
-And data required by lockdep exists in a local structure, held_locks
-embedded in task_struct. Forcing to access the data within the context,
-lockdep can avoid racy problems without explicit locks while handling
-the local data.
-
-Lastly, lockdep only needs to keep locks currently being held, to build
-a dependency graph. However, relaxing the limitation, it needs to keep
-even locks already released, because a decision whether they created
-dependencies might be long-deferred.
-
-To sum up, we can expect several advantages from the limitation:
-
-   1. Lockdep can easily identify a dependency when acquiring a lock.
-   2. Races are avoidable while accessing local locks in a held_locks.
-   3. Lockdep only needs to keep locks currently being held.
-
-CONCLUSION
-
-Given the limitation, the implementation becomes simple and efficient.
-
-
-Cons from the limitation
-------------------------
-
-Given the limitation, lockdep is applicable only to typical locks. For
-example, page locks for page access or completions for synchronization
-cannot work with lockdep.
-
-Can we detect deadlocks below, under the limitation?
-
-Example 1:
-
-   CONTEXT X      CONTEXT Y       CONTEXT Z
-   ---------      ---------       ----------
-                  mutex_lock A
-   lock_page B
-                  lock_page B
-                                  mutex_lock A /* DEADLOCK */
-                                  unlock_page B held by X
-                  unlock_page B
-                  mutex_unlock A
-                                  mutex_unlock A
-
-   where A and B are different lock classes.
-
-No, we cannot.
-
-Example 2:
-
-   CONTEXT X              CONTEXT Y
-   ---------              ---------
-                          mutex_lock A
-   mutex_lock A
-                          wait_for_complete B /* DEADLOCK */
-   complete B
-                          mutex_unlock A
-   mutex_unlock A
-
-   where A is a lock class and B is a completion variable.
-
-No, we cannot.
-
-CONCLUSION
-
-Given the limitation, lockdep cannot detect a deadlock or its
-possibility caused by page locks or completions.
-
-
-Relax the limitation
---------------------
-
-Under the limitation, things to create dependencies are limited to
-typical locks. However, synchronization primitives like page locks and
-completions, which are allowed to be released in any context, also
-create dependencies and can cause a deadlock. So lockdep should track
-these locks to do a better job. We have to relax the limitation for
-these locks to work with lockdep.
-
-Detecting dependencies is very important for lockdep to work because
-adding a dependency means adding an opportunity to check whether it
-causes a deadlock. The more lockdep adds dependencies, the more it
-thoroughly works. Thus Lockdep has to do its best to detect and add as
-many true dependencies into a graph as possible.
-
-For example, considering only typical locks, lockdep builds a graph like:
-
-   A -> B -
-           \
-            -> E
-           /
-   C -> D -
-
-   where A, B,..., E are different lock classes.
-
-On the other hand, under the relaxation, additional dependencies might
-be created and added. Assuming additional 'FX -> C' and 'E -> GX' are
-added thanks to the relaxation, the graph will be:
-
-         A -> B -
-                 \
-                  -> E -> GX
-                 /
-   FX -> C -> D -
-
-   where A, B,..., E, FX and GX are different lock classes, and a suffix
-   'X' is added on non-typical locks.
-
-The latter graph gives us more chances to check circular dependencies
-than the former. However, it might suffer performance degradation since
-relaxing the limitation, with which design and implementation of lockdep
-can be efficient, might introduce inefficiency inevitably. So lockdep
-should provide two options, strong detection and efficient detection.
-
-Choosing efficient detection:
-
-   Lockdep works with only locks restricted to be released within the
-   acquire context. However, lockdep works efficiently.
-
-Choosing strong detection:
-
-   Lockdep works with all synchronization primitives. However, lockdep
-   suffers performance degradation.
-
-CONCLUSION
-
-Relaxing the limitation, lockdep can add additional dependencies giving
-additional opportunities to check circular dependencies.
-
-
-============
-Crossrelease
-============
-
-Introduce crossrelease
-----------------------
-
-In order to allow lockdep to handle additional dependencies by what
-might be released in any context, namely 'crosslock', we have to be able
-to identify those created by crosslocks. The proposed 'crossrelease'
-feature provoides a way to do that.
-
-Crossrelease feature has to do:
-
-   1. Identify dependencies created by crosslocks.
-   2. Add the dependencies into a dependency graph.
-
-That's all. Once a meaningful dependency is added into graph, then
-lockdep would work with the graph as it did. The most important thing
-crossrelease feature has to do is to correctly identify and add true
-dependencies into the global graph.
-
-A dependency e.g. 'A -> B' can be identified only in the A's release
-context because a decision required to identify the dependency can be
-made only in the release context. That is to decide whether A can be
-released so that a waiter for A can be woken up. It cannot be made in
-other than the A's release context.
-
-It's no matter for typical locks because each acquire context is same as
-its release context, thus lockdep can decide whether a lock can be
-released in the acquire context. However for crosslocks, lockdep cannot
-make the decision in the acquire context but has to wait until the
-release context is identified.
-
-Therefore, deadlocks by crosslocks cannot be detected just when it
-happens, because those cannot be identified until the crosslocks are
-released. However, deadlock possibilities can be detected and it's very
-worth. See 'APPENDIX A' section to check why.
-
-CONCLUSION
-
-Using crossrelease feature, lockdep can work with what might be released
-in any context, namely crosslock.
-
-
-Introduce commit
-----------------
-
-Since crossrelease defers the work adding true dependencies of
-crosslocks until they are actually released, crossrelease has to queue
-all acquisitions which might create dependencies with the crosslocks.
-Then it identifies dependencies using the queued data in batches at a
-proper time. We call it 'commit'.
-
-There are four types of dependencies:
-
-1. TT type: 'typical lock A -> typical lock B'
-
-   Just when acquiring B, lockdep can see it's in the A's release
-   context. So the dependency between A and B can be identified
-   immediately. Commit is unnecessary.
-
-2. TC type: 'typical lock A -> crosslock BX'
-
-   Just when acquiring BX, lockdep can see it's in the A's release
-   context. So the dependency between A and BX can be identified
-   immediately. Commit is unnecessary, too.
-
-3. CT type: 'crosslock AX -> typical lock B'
-
-   When acquiring B, lockdep cannot identify the dependency because
-   there's no way to know if it's in the AX's release context. It has
-   to wait until the decision can be made. Commit is necessary.
-
-4. CC type: 'crosslock AX -> crosslock BX'
-
-   When acquiring BX, lockdep cannot identify the dependency because
-   there's no way to know if it's in the AX's release context. It has
-   to wait until the decision can be made. Commit is necessary.
-   But, handling CC type is not implemented yet. It's a future work.
-
-Lockdep can work without commit for typical locks, but commit step is
-necessary once crosslocks are involved. Introducing commit, lockdep
-performs three steps. What lockdep does in each step is:
-
-1. Acquisition: For typical locks, lockdep does what it originally did
-   and queues the lock so that CT type dependencies can be checked using
-   it at the commit step. For crosslocks, it saves data which will be
-   used at the commit step and increases a reference count for it.
-
-2. Commit: No action is reauired for typical locks. For crosslocks,
-   lockdep adds CT type dependencies using the data saved at the
-   acquisition step.
-
-3. Release: No changes are required for typical locks. When a crosslock
-   is released, it decreases a reference count for it.
-
-CONCLUSION
-
-Crossrelease introduces commit step to handle dependencies of crosslocks
-in batches at a proper time.
-
-
-==============
-Implementation
-==============
-
-Data structures
----------------
-
-Crossrelease introduces two main data structures.
-
-1. hist_lock
-
-   This is an array embedded in task_struct, for keeping lock history so
-   that dependencies can be added using them at the commit step. Since
-   it's local data, it can be accessed locklessly in the owner context.
-   The array is filled at the acquisition step and consumed at the
-   commit step. And it's managed in circular manner.
-
-2. cross_lock
-
-   One per lockdep_map exists. This is for keeping data of crosslocks
-   and used at the commit step.
-
-
-How crossrelease works
-----------------------
-
-It's the key of how crossrelease works, to defer necessary works to an
-appropriate point in time and perform in at once at the commit step.
-Let's take a look with examples step by step, starting from how lockdep
-works without crossrelease for typical locks.
-
-   acquire A /* Push A onto held_locks */
-   acquire B /* Push B onto held_locks and add 'A -> B' */
-   acquire C /* Push C onto held_locks and add 'B -> C' */
-   release C /* Pop C from held_locks */
-   release B /* Pop B from held_locks */
-   release A /* Pop A from held_locks */
-
-   where A, B and C are different lock classes.
-
-   NOTE: This document assumes that readers already understand how
-   lockdep works without crossrelease thus omits details. But there's
-   one thing to note. Lockdep pretends to pop a lock from held_locks
-   when releasing it. But it's subtly different from the original pop
-   operation because lockdep allows other than the top to be poped.
-
-In this case, lockdep adds 'the top of held_locks -> the lock to acquire'
-dependency every time acquiring a lock.
-
-After adding 'A -> B', a dependency graph will be:
-
-   A -> B
-
-   where A and B are different lock classes.
-
-And after adding 'B -> C', the graph will be:
-
-   A -> B -> C
-
-   where A, B and C are different lock classes.
-
-Let's performs commit step even for typical locks to add dependencies.
-Of course, commit step is not necessary for them, however, it would work
-well because this is a more general way.
-
-   acquire A
-   /*
-    * Queue A into hist_locks
-    *
-    * In hist_locks: A
-    * In graph: Empty
-    */
-
-   acquire B
-   /*
-    * Queue B into hist_locks
-    *
-    * In hist_locks: A, B
-    * In graph: Empty
-    */
-
-   acquire C
-   /*
-    * Queue C into hist_locks
-    *
-    * In hist_locks: A, B, C
-    * In graph: Empty
-    */
-
-   commit C
-   /*
-    * Add 'C -> ?'
-    * Answer the following to decide '?'
-    * What has been queued since acquire C: Nothing
-    *
-    * In hist_locks: A, B, C
-    * In graph: Empty
-    */
-
-   release C
-
-   commit B
-   /*
-    * Add 'B -> ?'
-    * Answer the following to decide '?'
-    * What has been queued since acquire B: C
-    *
-    * In hist_locks: A, B, C
-    * In graph: 'B -> C'
-    */
-
-   release B
-
-   commit A
-   /*
-    * Add 'A -> ?'
-    * Answer the following to decide '?'
-    * What has been queued since acquire A: B, C
-    *
-    * In hist_locks: A, B, C
-    * In graph: 'B -> C', 'A -> B', 'A -> C'
-    */
-
-   release A
-
-   where A, B and C are different lock classes.
-
-In this case, dependencies are added at the commit step as described.
-
-After commits for A, B and C, the graph will be:
-
-   A -> B -> C
-
-   where A, B and C are different lock classes.
-
-   NOTE: A dependency 'A -> C' is optimized out.
-
-We can see the former graph built without commit step is same as the
-latter graph built using commit steps. Of course the former way leads to
-earlier finish for building the graph, which means we can detect a
-deadlock or its possibility sooner. So the former way would be prefered
-when possible. But we cannot avoid using the latter way for crosslocks.
-
-Let's look at how commit steps work for crosslocks. In this case, the
-commit step is performed only on crosslock AX as real. And it assumes
-that the AX release context is different from the AX acquire context.
-
-   BX RELEASE CONTEXT             BX ACQUIRE CONTEXT
-   ------------------             ------------------
-                                  acquire A
-                                  /*
-                                   * Push A onto held_locks
-                                   * Queue A into hist_locks
-                                   *
-                                   * In held_locks: A
-                                   * In hist_locks: A
-                                   * In graph: Empty
-                                   */
-
-                                  acquire BX
-                                  /*
-                                   * Add 'the top of held_locks -> BX'
-                                   *
-                                   * In held_locks: A
-                                   * In hist_locks: A
-                                   * In graph: 'A -> BX'
-                                   */
-
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-   It must be guaranteed that the following operations are seen after
-   acquiring BX globally. It can be done by things like barrier.
-   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-   acquire C
-   /*
-    * Push C onto held_locks
-    * Queue C into hist_locks
-    *
-    * In held_locks: C
-    * In hist_locks: C
-    * In graph: 'A -> BX'
-    */
-
-   release C
-   /*
-    * Pop C from held_locks
-    *
-    * In held_locks: Empty
-    * In hist_locks: C
-    * In graph: 'A -> BX'
-    */
-                                  acquire D
-                                  /*
-                                   * Push D onto held_locks
-                                   * Queue D into hist_locks
-                                   * Add 'the top of held_locks -> D'
-                                   *
-                                   * In held_locks: A, D
-                                   * In hist_locks: A, D
-                                   * In graph: 'A -> BX', 'A -> D'
-                                   */
-   acquire E
-   /*
-    * Push E onto held_locks
-    * Queue E into hist_locks
-    *
-    * In held_locks: E
-    * In hist_locks: C, E
-    * In graph: 'A -> BX', 'A -> D'
-    */
-
-   release E
-   /*
-    * Pop E from held_locks
-    *
-    * In held_locks: Empty
-    * In hist_locks: D, E
-    * In graph: 'A -> BX', 'A -> D'
-    */
-                                  release D
-                                  /*
-                                   * Pop D from held_locks
-                                   *
-                                   * In held_locks: A
-                                   * In hist_locks: A, D
-                                   * In graph: 'A -> BX', 'A -> D'
-                                   */
-   commit BX
-   /*
-    * Add 'BX -> ?'
-    * What has been queued since acquire BX: C, E
-    *
-    * In held_locks: Empty
-    * In hist_locks: D, E
-    * In graph: 'A -> BX', 'A -> D',
-    *           'BX -> C', 'BX -> E'
-    */
-
-   release BX
-   /*
-    * In held_locks: Empty
-    * In hist_locks: D, E
-    * In graph: 'A -> BX', 'A -> D',
-    *           'BX -> C', 'BX -> E'
-    */
-                                  release A
-                                  /*
-                                   * Pop A from held_locks
-                                   *
-                                   * In held_locks: Empty
-                                   * In hist_locks: A, D
-                                   * In graph: 'A -> BX', 'A -> D',
-                                   *           'BX -> C', 'BX -> E'
-                                   */
-
-   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
-   added on crosslocks.
-
-Crossrelease considers all acquisitions after acqiuring BX are
-candidates which might create dependencies with BX. True dependencies
-will be determined when identifying the release context of BX. Meanwhile,
-all typical locks are queued so that they can be used at the commit step.
-And then two dependencies 'BX -> C' and 'BX -> E' are added at the
-commit step when identifying the release context.
-
-The final graph will be, with crossrelease:
-
-               -> C
-              /
-       -> BX -
-      /       \
-   A -         -> E
-      \
-       -> D
-
-   where A, BX, C,..., E are different lock classes, and a suffix 'X' is
-   added on crosslocks.
-
-However, the final graph will be, without crossrelease:
-
-   A -> D
-
-   where A and D are different lock classes.
-
-The former graph has three more dependencies, 'A -> BX', 'BX -> C' and
-'BX -> E' giving additional opportunities to check if they cause
-deadlocks. This way lockdep can detect a deadlock or its possibility
-caused by crosslocks.
-
-CONCLUSION
-
-We checked how crossrelease works with several examples.
-
-
-=============
-Optimizations
-=============
-
-Avoid duplication
------------------
-
-Crossrelease feature uses a cache like what lockdep already uses for
-dependency chains, but this time it's for caching CT type dependencies.
-Once that dependency is cached, the same will never be added again.
-
-
-Lockless for hot paths
-----------------------
-
-To keep all locks for later use at the commit step, crossrelease adopts
-a local array embedded in task_struct, which makes access to the data
-lockless by forcing it to happen only within the owner context. It's
-like how lockdep handles held_locks. Lockless implmentation is important
-since typical locks are very frequently acquired and released.
-
-
-=================================================
-APPENDIX A: What lockdep does to work aggresively
-=================================================
-
-A deadlock actually occurs when all wait operations creating circular
-dependencies run at the same time. Even though they don't, a potential
-deadlock exists if the problematic dependencies exist. Thus it's
-meaningful to detect not only an actual deadlock but also its potential
-possibility. The latter is rather valuable. When a deadlock occurs
-actually, we can identify what happens in the system by some means or
-other even without lockdep. However, there's no way to detect possiblity
-without lockdep unless the whole code is parsed in head. It's terrible.
-Lockdep does the both, and crossrelease only focuses on the latter.
-
-Whether or not a deadlock actually occurs depends on several factors.
-For example, what order contexts are switched in is a factor. Assuming
-circular dependencies exist, a deadlock would occur when contexts are
-switched so that all wait operations creating the dependencies run
-simultaneously. Thus to detect a deadlock possibility even in the case
-that it has not occured yet, lockdep should consider all possible
-combinations of dependencies, trying to:
-
-1. Use a global dependency graph.
-
-   Lockdep combines all dependencies into one global graph and uses them,
-   regardless of which context generates them or what order contexts are
-   switched in. Aggregated dependencies are only considered so they are
-   prone to be circular if a problem exists.
-
-2. Check dependencies between classes instead of instances.
-
-   What actually causes a deadlock are instances of lock. However,
-   lockdep checks dependencies between classes instead of instances.
-   This way lockdep can detect a deadlock which has not happened but
-   might happen in future by others but the same class.
-
-3. Assume all acquisitions lead to waiting.
-
-   Although locks might be acquired without waiting which is essential
-   to create dependencies, lockdep assumes all acquisitions lead to
-   waiting since it might be true some time or another.
-
-CONCLUSION
-
-Lockdep detects not only an actual deadlock but also its possibility,
-and the latter is more valuable.
-
-
-==================================================
-APPENDIX B: How to avoid adding false dependencies
-==================================================
-
-Remind what a dependency is. A dependency exists if:
-
-   1. There are two waiters waiting for each event at a given time.
-   2. The only way to wake up each waiter is to trigger its event.
-   3. Whether one can be woken up depends on whether the other can.
-
-For example:
-
-   acquire A
-   acquire B /* A dependency 'A -> B' exists */
-   release B
-   release A
-
-   where A and B are different lock classes.
-
-A depedency 'A -> B' exists since:
-
-   1. A waiter for A and a waiter for B might exist when acquiring B.
-   2. Only way to wake up each is to release what it waits for.
-   3. Whether the waiter for A can be woken up depends on whether the
-      other can. IOW, TASK X cannot release A if it fails to acquire B.
-
-For another example:
-
-   TASK X                         TASK Y
-   ------                         ------
-                                  acquire AX
-   acquire B /* A dependency 'AX -> B' exists */
-   release B
-   release AX held by Y
-
-   where AX and B are different lock classes, and a suffix 'X' is added
-   on crosslocks.
-
-Even in this case involving crosslocks, the same rule can be applied. A
-depedency 'AX -> B' exists since:
-
-   1. A waiter for AX and a waiter for B might exist when acquiring B.
-   2. Only way to wake up each is to release what it waits for.
-   3. Whether the waiter for AX can be woken up depends on whether the
-      other can. IOW, TASK X cannot release AX if it fails to acquire B.
-
-Let's take a look at more complicated example:
-
-   TASK X                         TASK Y
-   ------                         ------
-   acquire B
-   release B
-   fork Y
-                                  acquire AX
-   acquire C /* A dependency 'AX -> C' exists */
-   release C
-   release AX held by Y
-
-   where AX, B and C are different lock classes, and a suffix 'X' is
-   added on crosslocks.
-
-Does a dependency 'AX -> B' exist? Nope.
-
-Two waiters are essential to create a dependency. However, waiters for
-AX and B to create 'AX -> B' cannot exist at the same time in this
-example. Thus the dependency 'AX -> B' cannot be created.
-
-It would be ideal if the full set of true ones can be considered. But
-we can ensure nothing but what actually happened. Relying on what
-actually happens at runtime, we can anyway add only true ones, though
-they might be a subset of true ones. It's similar to how lockdep works
-for typical locks. There might be more true dependencies than what
-lockdep has detected in runtime. Lockdep has no choice but to rely on
-what actually happens. Crossrelease also relies on it.
-
-CONCLUSION
-
-Relying on what actually happens, lockdep can avoid adding false
-dependencies.
index 89fff7d611ccb533a5c3d375bc94fecf3c2e0687..0b3a1148f9f0414558ed0537b4219225162ccc3a 100644 (file)
@@ -98,5 +98,25 @@ request is made for a page in an old zpool, it is uncompressed using its
 original compressor.  Once all pages are removed from an old zpool, the zpool
 and its compressor are freed.
 
+Some of the pages in zswap are same-value filled pages (i.e. contents of the
+page have same value or repetitive pattern). These pages include zero-filled
+pages and they are handled differently. During store operation, a page is
+checked if it is a same-value filled page before compressing it. If true, the
+compressed length of the page is set to zero and the pattern or same-filled
+value is stored.
+
+Same-value filled pages identification feature is enabled by default and can be
+disabled at boot time by setting the "same_filled_pages_enabled" attribute to 0,
+e.g. zswap.same_filled_pages_enabled=0. It can also be enabled and disabled at
+runtime using the sysfs "same_filled_pages_enabled" attribute, e.g.
+
+echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
+
+When zswap same-filled page identification is disabled at runtime, it will stop
+checking for the same-value filled pages during store operation. However, the
+existing pages which are marked as same-value filled pages remain stored
+unchanged in zswap until they are either loaded or invalidated.
+
 A debugfs interface is provided for various statistic about pool size, number
-of pages stored, and various counters for the reasons pages are rejected.
+of pages stored, same-value filled pages and various counters for the reasons
+pages are rejected.
index 3448e675b4623ce81b5e0bc1116c52a12c411801..ad41b3813f0a3a3bd5abb32532b42c9b820bbe27 100644 (file)
@@ -1,6 +1,4 @@
 
-<previous description obsolete, deleted>
-
 Virtual memory map with 4 level page tables:
 
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
@@ -14,13 +12,16 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
 ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
 ... unused hole ...
+fffffe0000000000 - fffffe7fffffffff (=39 bits) LDT remap for PTI
+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space (variable)
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space (variable)
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Virtual memory map with 5 level page tables:
@@ -29,26 +30,29 @@ Virtual memory map with 5 level page tables:
 hole caused by [56:63] sign extension
 ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
 ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
-ff90000000000000 - ff91ffffffffffff (=49 bits) hole
-ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
+ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
+ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
 ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
 ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
 ... unused hole ...
 ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
 ... unused hole ...
+fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
 ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
 ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
-ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
-ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffa0000000 - [fixmap start]   (~1526 MB) module mapping space
+[fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
+ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
 
 Architecture defines a 64-bit virtual address. Implementations can support
 less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
-through to the most-significant implemented bit are set to either all ones
-or all zero. This causes hole between user space and kernel addresses.
+through to the most-significant implemented bit are sign extended.
+This causes hole between user space and kernel addresses if you interpret them
+as unsigned.
 
 The direct mapping covers all memory in the system up to the highest
 memory address (this means in some cases it can also include PCI memory
@@ -58,9 +62,6 @@ vmalloc space is lazily synchronized into the different PML4/PML5 pages of
 the processes using the page fault handler, with init_top_pgt as
 reference.
 
-Current X86-64 implementations support up to 46 bits of address space (64 TB),
-which is our current limit. This expands into MBZ space in the page tables.
-
 We map EFI runtime services in the 'efi_pgd' PGD in a 64Gb large virtual
 memory window (this size is arbitrary, it can be raised later if needed).
 The mappings are not part of any other kernel PGD and are only available
@@ -72,5 +73,3 @@ following fixmap section.
 Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
 physical memory, vmalloc/ioremap space and virtual memory map are randomized.
 Their order is preserved but their base will be offset early at boot time.
-
--Andi Kleen, Jul 2004
index 5da966e19e8a958a9b7e5de4ceb88a0371e27559..b46c9cea5ae5b90227687436eb64130d05ae10ca 100644 (file)
@@ -2621,24 +2621,22 @@ F:      fs/bfs/
 F:     include/uapi/linux/bfs_fs.h
 
 BLACKFIN ARCHITECTURE
-M:     Steven Miao <realmz6@gmail.com>
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 T:     git git://git.code.sf.net/p/adi-linux/code
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     arch/blackfin/
 
 BLACKFIN EMAC DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/net/ethernet/adi/
 
 BLACKFIN MEDIA DRIVER
-M:     Scott Jiang <scott.jiang.linux@gmail.com>
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org/
-S:     Supported
+S:     Orphan
 F:     drivers/media/platform/blackfin/
 F:     drivers/media/i2c/adv7183*
 F:     drivers/media/i2c/vs6624*
@@ -2646,25 +2644,25 @@ F:      drivers/media/i2c/vs6624*
 BLACKFIN RTC DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/rtc/rtc-bfin.c
 
 BLACKFIN SDH DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/mmc/host/bfin_sdh.c
 
 BLACKFIN SERIAL DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/tty/serial/bfin_uart.c
 
 BLACKFIN WATCHDOG DRIVER
 L:     adi-buildroot-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:     http://blackfin.uclinux.org
-S:     Supported
+S:     Orphan
 F:     drivers/watchdog/bfin_wdt.c
 
 BLINKM RGB LED DRIVER
@@ -5431,7 +5429,7 @@ F:        drivers/media/tuners/fc2580*
 
 FCOE SUBSYSTEM (libfc, libfcoe, fcoe)
 M:     Johannes Thumshirn <jth@kernel.org>
-L:     fcoe-devel@open-fcoe.org
+L:     linux-scsi@vger.kernel.org
 W:     www.Open-FCoE.org
 S:     Supported
 F:     drivers/scsi/libfc/
@@ -13117,6 +13115,7 @@ F:      drivers/dma/dw/
 
 SYNOPSYS DESIGNWARE ENTERPRISE ETHERNET DRIVER
 M:     Jie Deng <jiedeng@synopsys.com>
+M:     Jose Abreu <Jose.Abreu@synopsys.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/synopsys/
index 3f4d157add54018fbea707a8129f2326b463dec6..92b74bcd3c2a28870c4368bfa26c60e9e510f3e5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 15
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc5
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
@@ -789,6 +789,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS  += $(call cc-option,-fno-strict-overflow)
 
+# Make sure -fstack-check isn't enabled (like gentoo apparently did)
+KBUILD_CFLAGS  += $(call cc-option,-fno-stack-check,)
+
 # conserve stack if available
 KBUILD_CFLAGS   += $(call cc-option,-fconserve-stack)
 
index fbb3758ca2e3e77d4a65e47bb9229429305a0015..4b8edc8982cf156931177b0ecce0f6b9329afce3 100644 (file)
                                        switch0port10: port@10 {
                                                reg = <10>;
                                                label = "dsa";
-                                               phy-mode = "xgmii";
+                                               phy-mode = "xaui";
                                                link = <&switch1port10>;
                                        };
                                };
                                        switch1port10: port@10 {
                                                reg = <10>;
                                                label = "dsa";
-                                               phy-mode = "xgmii";
+                                               phy-mode = "xaui";
                                                link = <&switch0port10>;
                                        };
                                };
index 1712f132b80d2402d94d72ea974a0c3326fa2f52..b83fdc06286a64ece150fb7e419bc587e47c3e34 100644 (file)
                .pushsection .text.fixup,"ax"
                .align  4
 9001:          mov     r4, #-EFAULT
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+               ldr     r5, [sp, #9*4]          @ *err_ptr
+#else
                ldr     r5, [sp, #8*4]          @ *err_ptr
+#endif
                str     r4, [r5]
                ldmia   sp, {r1, r2}            @ retrieve dst, len
                add     r2, r2, r1
index a93339f5178f2eff247144eb9244c077225094bc..c9a7e9e1414f344c9dfd515600e3e4378bf61d81 100644 (file)
@@ -557,7 +557,6 @@ config QCOM_QDF2400_ERRATUM_0065
 
          If unsure, say Y.
 
-
 config SOCIONEXT_SYNQUACER_PREITS
        bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
        default y
@@ -576,6 +575,17 @@ config HISILICON_ERRATUM_161600802
          a 128kB offset to be applied to the target address in this commands.
 
          If unsure, say Y.
+
+config QCOM_FALKOR_ERRATUM_E1041
+       bool "Falkor E1041: Speculative instruction fetches might cause errant memory access"
+       default y
+       help
+         Falkor CPU may speculatively fetch instructions from an improper
+         memory location when MMU translation is changed from SCTLR_ELn[M]=1
+         to SCTLR_ELn[M]=0. Prefix an ISB instruction to fix the problem.
+
+         If unsure, say Y.
+
 endmenu
 
 
index aef72d886677758c76d6b932c863893df7c67b53..8b168280976f25de43539ed1b4dbed9b952fcfde 100644 (file)
@@ -512,4 +512,14 @@ alternative_else_nop_endif
 #endif
        .endm
 
+/**
+ * Errata workaround prior to disable MMU. Insert an ISB immediately prior
+ * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
+ */
+       .macro pre_disable_mmu_workaround
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
+       isb
+#endif
+       .endm
+
 #endif /* __ASM_ASSEMBLER_H */
index ac67cfc2585a8af417405958779b792d60b06e6a..060e3a4008abd18e4a5aa48bb5b6fa1674a735c3 100644 (file)
@@ -60,6 +60,9 @@ enum ftr_type {
 #define FTR_VISIBLE    true    /* Feature visible to the user space */
 #define FTR_HIDDEN     false   /* Feature is hidden from the user */
 
+#define FTR_VISIBLE_IF_IS_ENABLED(config)              \
+       (IS_ENABLED(config) ? FTR_VISIBLE : FTR_HIDDEN)
+
 struct arm64_ftr_bits {
        bool            sign;   /* Value is signed ? */
        bool            visible;
index 235e77d982610a0114f62cc833278994cc2e76b2..cbf08d7cbf3089949bb4ada755a36383e6e5db2e 100644 (file)
@@ -91,6 +91,7 @@
 #define BRCM_CPU_PART_VULCAN           0x516
 
 #define QCOM_CPU_PART_FALKOR_V1                0x800
+#define QCOM_CPU_PART_FALKOR           0xC00
 
 #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
 #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
+#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
 
 #ifndef __ASSEMBLY__
 
index 149d05fb9421520bd659b62627941ed36ce46bb3..bdcc7f1c9d069df3d95c6884b8071cdba530cfc8 100644 (file)
@@ -42,6 +42,8 @@
 #include <asm/cmpxchg.h>
 #include <asm/fixmap.h>
 #include <linux/mmdebug.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
 
 extern void __pte_error(const char *file, int line, unsigned long val);
 extern void __pmd_error(const char *file, int line, unsigned long val);
@@ -149,12 +151,20 @@ static inline pte_t pte_mkwrite(pte_t pte)
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
-       return clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+       pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+       pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+       return pte;
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-       return set_pte_bit(pte, __pgprot(PTE_DIRTY));
+       pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
+       if (pte_write(pte))
+               pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+       return pte;
 }
 
 static inline pte_t pte_mkold(pte_t pte)
@@ -207,9 +217,6 @@ static inline void set_pte(pte_t *ptep, pte_t pte)
        }
 }
 
-struct mm_struct;
-struct vm_area_struct;
-
 extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 
 /*
@@ -238,7 +245,8 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
         * hardware updates of the pte (ptep_set_access_flags safely changes
         * valid ptes without going through an invalid entry).
         */
-       if (pte_valid(*ptep) && pte_valid(pte)) {
+       if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+          (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
                VM_WARN_ONCE(!pte_young(pte),
                             "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
                             __func__, pte_val(*ptep), pte_val(pte));
@@ -641,28 +649,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /*
- * ptep_set_wrprotect - mark read-only while preserving the hardware update of
- * the Access Flag.
+ * ptep_set_wrprotect - mark read-only while trasferring potential hardware
+ * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit.
  */
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
 {
        pte_t old_pte, pte;
 
-       /*
-        * ptep_set_wrprotect() is only called on CoW mappings which are
-        * private (!VM_SHARED) with the pte either read-only (!PTE_WRITE &&
-        * PTE_RDONLY) or writable and software-dirty (PTE_WRITE &&
-        * !PTE_RDONLY && PTE_DIRTY); see is_cow_mapping() and
-        * protection_map[]. There is no race with the hardware update of the
-        * dirty state: clearing of PTE_RDONLY when PTE_WRITE (a.k.a. PTE_DBM)
-        * is set.
-        */
-       VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(*ptep),
-                    "%s: potential race with hardware DBM", __func__);
        pte = READ_ONCE(*ptep);
        do {
                old_pte = pte;
+               /*
+                * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY
+                * clear), set the PTE_DIRTY bit.
+                */
+               if (pte_hw_dirty(pte))
+                       pte = pte_mkdirty(pte);
                pte = pte_wrprotect(pte);
                pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep),
                                               pte_val(old_pte), pte_val(pte));
index 65f42d2574142d4b37bebf49ed1fc3cdccbb56ae..2a752cb2a0f35a82f2a60e744d160af9b5f6c6a1 100644 (file)
@@ -37,6 +37,7 @@ ENTRY(__cpu_soft_restart)
        mrs     x12, sctlr_el1
        ldr     x13, =SCTLR_ELx_FLAGS
        bic     x12, x12, x13
+       pre_disable_mmu_workaround
        msr     sctlr_el1, x12
        isb
 
index c5ba0097887f93e9d30b37355b84e5750d74d04e..a73a5928f09b26ae7b2de7b3c2217e5c975de4a0 100644 (file)
@@ -145,7 +145,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
-       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+                                  FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
        S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
        S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
index 4e6ad355bd058e6a4ab73a0f94832a7b1fe719a6..6b9736c3fb5630ab31c17b662b5c5cfe2b7d0832 100644 (file)
@@ -96,6 +96,7 @@ ENTRY(entry)
        mrs     x0, sctlr_el2
        bic     x0, x0, #1 << 0 // clear SCTLR.M
        bic     x0, x0, #1 << 2 // clear SCTLR.C
+       pre_disable_mmu_workaround
        msr     sctlr_el2, x0
        isb
        b       2f
@@ -103,6 +104,7 @@ ENTRY(entry)
        mrs     x0, sctlr_el1
        bic     x0, x0, #1 << 0 // clear SCTLR.M
        bic     x0, x0, #1 << 2 // clear SCTLR.C
+       pre_disable_mmu_workaround
        msr     sctlr_el1, x0
        isb
 2:
index 540a1e010eb519a6223e091d5f9f00b716fc2c7b..fae81f7964b4f226242961607cb087a20710e22b 100644 (file)
@@ -1043,7 +1043,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 
        local_bh_disable();
 
-       current->thread.fpsimd_state = *state;
+       current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
        if (system_supports_sve() && test_thread_flag(TIF_SVE))
                fpsimd_to_sve(current);
 
index 67e86a0f57ac43edcee10d89bd5db2e050ae1621..e3cb9fbf96b66c3ba2d4327d4c1a4b3ca734ef1f 100644 (file)
@@ -750,6 +750,7 @@ __primary_switch:
         * to take into account by discarding the current kernel mapping and
         * creating a new one.
         */
+       pre_disable_mmu_workaround
        msr     sctlr_el1, x20                  // disable the MMU
        isb
        bl      __create_page_tables            // recreate kernel mapping
index 749f81779420c7bab2ead0d8f5b9a6cf108e6a45..74bb56f656eff024839df19897ba06512128e9bb 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/perf_event.h>
 #include <linux/ptrace.h>
 #include <linux/smp.h>
+#include <linux/uaccess.h>
 
 #include <asm/compat.h>
 #include <asm/current.h>
@@ -36,7 +37,6 @@
 #include <asm/traps.h>
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
-#include <asm/uaccess.h>
 
 /* Breakpoint currently in use for each BRP. */
 static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]);
index ce704a4aeadd438bf637472bb7037b89fba15087..f407e422a7200b86072349cc70e1e6d5e7e1753b 100644 (file)
@@ -45,6 +45,7 @@ ENTRY(arm64_relocate_new_kernel)
        mrs     x0, sctlr_el2
        ldr     x1, =SCTLR_ELx_FLAGS
        bic     x0, x0, x1
+       pre_disable_mmu_workaround
        msr     sctlr_el2, x0
        isb
 1:
index 3f9615582377661a88fab8be6a12365d625d830a..870828c364c508f825eacc1c49c17886dc9c8cb2 100644 (file)
@@ -151,6 +151,7 @@ reset:
        mrs     x5, sctlr_el2
        ldr     x6, =SCTLR_ELx_FLAGS
        bic     x5, x5, x6              // Clear SCTL_M and etc
+       pre_disable_mmu_workaround
        msr     sctlr_el2, x5
        isb
 
index 321c9c05dd9e09fc0c745a4543a286b7628f00a4..f4363d40e2cd7fd62d40d826d5296c95f15cde9f 100644 (file)
@@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
 {
        u64 reg;
 
+       /* Clear pmscr in case of early return */
+       *pmscr_el1 = 0;
+
        /* SPE present on this CPU? */
        if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
                                                  ID_AA64DFR0_PMSVER_SHIFT))
index ca74a2aace425b95ed95ecf0e70a78188621004e..7b60d62ac5939e83c8e153ec1c3a0447565f23eb 100644 (file)
@@ -389,7 +389,7 @@ void ptdump_check_wx(void)
                .check_wx = true,
        };
 
-       walk_pgd(&st, &init_mm, 0);
+       walk_pgd(&st, &init_mm, VA_START);
        note_page(&st, 0, 0, 0);
        if (st.wx_pages || st.uxn_pages)
                pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
index 22168cd0dde73e06698bc40b166867df17a00134..9b7f89df49dbfe108da2eadc59421ce99a4432b4 100644 (file)
@@ -574,7 +574,6 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
        struct siginfo info;
        const struct fault_info *inf;
-       int ret = 0;
 
        inf = esr_to_fault_info(esr);
        pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
@@ -589,7 +588,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
                if (interrupts_enabled(regs))
                        nmi_enter();
 
-               ret = ghes_notify_sea();
+               ghes_notify_sea();
 
                if (interrupts_enabled(regs))
                        nmi_exit();
@@ -604,7 +603,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
                info.si_addr  = (void __user *)addr;
        arm64_notify_die("", regs, &info, esr);
 
-       return ret;
+       return 0;
 }
 
 static const struct fault_info fault_info[] = {
index 5960bef0170df85916d0c1ac3b65f570f0af67ea..00e7b900ca4193e83dfa7de7dd506984afe90bce 100644 (file)
@@ -476,6 +476,8 @@ void __init arm64_memblock_init(void)
 
        reserve_elfcorehdr();
 
+       high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
+
        dma_contiguous_reserve(arm64_dma_phys_limit);
 
        memblock_allow_resize();
@@ -502,7 +504,6 @@ void __init bootmem_init(void)
        sparse_init();
        zone_sizes_init(min, max);
 
-       high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
        memblock_dump_all();
 }
 
index 9345b44b86f036572e33721eb80e9bbbe4493aa4..f57118e1f6b4265257799ae2cf8ea356077e20b9 100644 (file)
@@ -123,8 +123,8 @@ int puts(const char *s)
        while ((nuline = strchr(s, '\n')) != NULL) {
                if (nuline != s)
                        pdc_iodc_print(s, nuline - s);
-                       pdc_iodc_print("\r\n", 2);
-                       s = nuline + 1;
+               pdc_iodc_print("\r\n", 2);
+               s = nuline + 1;
        }
        if (*s != '\0')
                pdc_iodc_print(s, strlen(s));
index c980a02a52bc0dda0a23b205f59d1d86438553f2..598c8d60fa5e602cc9303e1986ada9680d64feb3 100644 (file)
@@ -35,7 +35,12 @@ struct thread_info {
 
 /* thread information allocation */
 
+#ifdef CONFIG_IRQSTACKS
+#define THREAD_SIZE_ORDER      2 /* PA-RISC requires at least 16k stack */
+#else
 #define THREAD_SIZE_ORDER      3 /* PA-RISC requires at least 32k stack */
+#endif
+
 /* Be sure to hunt all references to this down when you change the size of
  * the kernel stack */
 #define THREAD_SIZE             (PAGE_SIZE << THREAD_SIZE_ORDER)
index a4fd296c958e8e14f13a913aca50510b11eb49b7..f3cecf5117cf8ab14724f0ea3535220c3224d569 100644 (file)
@@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
        STREG   %r19,PT_SR7(%r16)
 
 intr_return:
-       /* NOTE: Need to enable interrupts incase we schedule. */
-       ssm     PSW_SM_I, %r0
-
        /* check for reschedule */
        mfctl   %cr30,%r1
        LDREG   TI_FLAGS(%r1),%r19      /* sched.h: TIF_NEED_RESCHED */
@@ -907,6 +904,11 @@ intr_check_sig:
        LDREG   PT_IASQ1(%r16), %r20
        cmpib,COND(=),n 0,%r20,intr_restore /* backward */
 
+       /* NOTE: We need to enable interrupts if we have to deliver
+        * signals. We used to do this earlier but it caused kernel
+        * stack overflows. */
+       ssm     PSW_SM_I, %r0
+
        copy    %r0, %r25                       /* long in_syscall = 0 */
 #ifdef CONFIG_64BIT
        ldo     -16(%r30),%r29                  /* Reference param save area */
@@ -958,6 +960,10 @@ intr_do_resched:
        cmpib,COND(=)   0, %r20, intr_do_preempt
        nop
 
+       /* NOTE: We need to enable interrupts if we schedule.  We used
+        * to do this earlier but it caused kernel stack overflows. */
+       ssm     PSW_SM_I, %r0
+
 #ifdef CONFIG_64BIT
        ldo     -16(%r30),%r29          /* Reference param save area */
 #endif
index e3a8e5e4d5de75897adcea4134f87c7246f60646..8d072c44f300c16d45ba8f4ee0c2eee6435e4ddd 100644 (file)
@@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
 
 
        __INITRODATA
+       .align 4
        .export os_hpmc_size
 os_hpmc_size:
        .word .os_hpmc_end-.os_hpmc
index 5a657986ebbf4bef7beff4e8c8d20f1343872347..143f90e2f9f3c631616d4af52f0fe3fa08f44af9 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/slab.h>
 #include <linux/kallsyms.h>
 #include <linux/sort.h>
-#include <linux/sched.h>
 
 #include <linux/uaccess.h>
 #include <asm/assembly.h>
index 7eab4bb8abe630b14c54c3b457285b4228607dc6..66e506520505d8a3245d49d492831df5e3bbb42a 100644 (file)
@@ -16,9 +16,7 @@
 #include <linux/preempt.h>
 #include <linux/init.h>
 
-#include <asm/processor.h>
 #include <asm/delay.h>
-
 #include <asm/special_insns.h>    /* for mfctl() */
 #include <asm/processor.h> /* for boot_cpu_data */
 
index 6177d43f0ce8afa9c1f6a1101e92ba161e47d97a..e2a2b8400490049143edee40316313a906ca6db7 100644 (file)
@@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
 #endif
 }
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 #ifndef CONFIG_PPC_BOOK3S_64
index 5acb5a176dbe5c8bffe6ddb7458b7d3ac2b7019f..72be0c32e902a35fa45e5ed02036df91999dda58 100644 (file)
@@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)
 
        printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
               regs->nip, regs->link, regs->ctr);
-       printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
+       printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
               regs, regs->trap, print_tainted(), init_utsname()->release);
        printk("MSR:  "REG" ", regs->msr);
        print_msr_bits(regs->msr);
index bf457843e03217b9aa02815d7791f0fce72aea2b..0d750d274c4e21a3324eb3505bbd73c86a58cdc9 100644 (file)
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
 
        /* Return the per-cpu state for state saving/migration */
        return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
-              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+              (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+              (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
 }
 
 int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 
        /*
         * Restore P and Q. If the interrupt was pending, we
-        * force both P and Q, which will trigger a resend.
+        * force Q and !P, which will trigger a resend.
         *
         * That means that a guest that had both an interrupt
         * pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
         * is perfectly fine as coalescing interrupts that haven't
         * been presented yet is always allowed.
         */
-       if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+       if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
                state->old_p = true;
        if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
                state->old_q = true;
index 46d74e81aff1b4caad4769e7686fa0a800695cd4..d183b4801bdbded832b90d2aa1a18e713f70695b 100644 (file)
@@ -763,7 +763,8 @@ emit_clear:
                        func = (u8 *) __bpf_call_base + imm;
 
                        /* Save skb pointer if we need to re-cache skb data */
-                       if (bpf_helper_changes_pkt_data(func))
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func))
                                PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
 
                        bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -772,7 +773,8 @@ emit_clear:
                        PPC_MR(b2p[BPF_REG_0], 3);
 
                        /* refresh skb cache */
-                       if (bpf_helper_changes_pkt_data(func)) {
+                       if ((ctx->seen & SEEN_SKB) &&
+                           bpf_helper_changes_pkt_data(func)) {
                                /* reload skb pointer to r3 */
                                PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
                                bpf_jit_emit_skb_loads(image, ctx);
index 1538129663658381b6b1a425dcbf582b1ed09531..fce545774d50afc6093c28ad2f4127c24ed5331c 100644 (file)
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
        int ret;
        __u64 target;
 
-       if (is_kernel_addr(addr))
-               return branch_target((unsigned int *)addr);
+       if (is_kernel_addr(addr)) {
+               if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+                       return 0;
+
+               return branch_target(&instr);
+       }
 
        /* Userspace: need copy instruction here then translate it */
        pagefault_disable();
index 0ead3cd73caa2f8816e8c04f47cca691efba0560..be4e7f84f70a59db60e92a9bfe845678f71cc608 100644 (file)
@@ -309,6 +309,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
        if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
                return 0;
 
+       /*
+        * Check whether nest_imc is registered. We could end up here if the
+        * cpuhotplug callback registration fails. i.e, callback invokes the
+        * offline path for all successfully registered nodes. At this stage,
+        * nest_imc pmu will not be registered and we should return here.
+        *
+        * We return with a zero since this is not an offline failure. And
+        * cpuhp_setup_state() returns the actual failure reason to the caller,
+        * which in turn will call the cleanup routine.
+        */
+       if (!nest_pmus)
+               return 0;
+
        /*
         * Now that this cpu is one of the designated,
         * find a next cpu a) which is online and b) in same chip.
@@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                if (nest_pmus == 1) {
                        cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
                        kfree(nest_imc_refc);
+                       kfree(per_nest_pmu_arr);
                }
 
                if (nest_pmus > 0)
@@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
                kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
        kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
        kfree(pmu_ptr);
-       kfree(per_nest_pmu_arr);
        return;
 }
 
@@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
                        ret = nest_pmu_cpumask_init();
                        if (ret) {
                                mutex_unlock(&nest_init_lock);
+                               kfree(nest_imc_refc);
+                               kfree(per_nest_pmu_arr);
                                goto err_free;
                        }
                }
index 773c4e039cd7288bcd25ed53ce831db84c766f26..c0319cbf1eec58d7ea8960259838b865c23f49a1 100644 (file)
 #define smp_rmb()      RISCV_FENCE(r,r)
 #define smp_wmb()      RISCV_FENCE(w,w)
 
+/*
+ * This is a very specific barrier: it's currently only used in two places in
+ * the kernel, both in the scheduler.  See include/linux/spinlock.h for the two
+ * orderings it guarantees, but the "critical section is RCsc" guarantee
+ * mandates a barrier on RISC-V.  The sequence looks like:
+ *
+ *    lr.aq lock
+ *    sc    lock <= LOCKED
+ *    smp_mb__after_spinlock()
+ *    // critical section
+ *    lr    lock
+ *    sc.rl lock <= UNLOCKED
+ *
+ * The AQ/RL pair provides a RCpc critical section, but there's not really any
+ * way we can take advantage of that here because the ordering is only enforced
+ * on that one lock.  Thus, we're just doing a full fence.
+ */
+#define smp_mb__after_spinlock()       RISCV_FENCE(rw,rw)
+
 #include <asm-generic/barrier.h>
 
 #endif /* __ASSEMBLY__ */
index 8fbb6749910d42473d814b37eb255f812d4d206b..cb7b0c63014ecbc61c8d9a2b8263a65dd1775d11 100644 (file)
 #include <asm/tlbflush.h>
 #include <asm/thread_info.h>
 
-#ifdef CONFIG_HVC_RISCV_SBI
-#include <asm/hvc_riscv_sbi.h>
-#endif
-
 #ifdef CONFIG_DUMMY_CONSOLE
 struct screen_info screen_info = {
        .orig_video_lines       = 30,
@@ -212,13 +208,6 @@ static void __init setup_bootmem(void)
 
 void __init setup_arch(char **cmdline_p)
 {
-#if defined(CONFIG_HVC_RISCV_SBI)
-       if (likely(early_console == NULL)) {
-               early_console = &riscv_sbi_early_console_dev;
-               register_console(early_console);
-       }
-#endif
-
 #ifdef CONFIG_CMDLINE_BOOL
 #ifdef CONFIG_CMDLINE_OVERRIDE
        strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
index a2ae936a093e4f91d4def96fff0dd46e6e0226c0..79c78668258ede202086c072c63352ca14585903 100644 (file)
@@ -70,7 +70,7 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
        bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
 
        /* Check the reserved flags. */
-       if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL))
+       if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL))
                return -EINVAL;
 
        flush_icache_mm(mm, local);
index 57d7bc92e0b8a766d24520ea5234fca56971b646..0a6b0286c32e9e0a7283d9cfb66dc357fe2e36fa 100644 (file)
@@ -1264,12 +1264,6 @@ static inline pud_t pud_mkwrite(pud_t pud)
        return pud;
 }
 
-#define pud_write pud_write
-static inline int pud_write(pud_t pud)
-{
-       return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
-}
-
 static inline pud_t pud_mkclean(pud_t pud)
 {
        if (pud_large(pud)) {
index f04db3779b34507f9dd38791fc89131505d5f0c3..59eea9c65d3e9e8595d509001b1c794420060887 100644 (file)
@@ -263,6 +263,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
                return retval;
        }
 
+       groups_sort(group_info);
        retval = set_current_groups(group_info);
        put_group_info(group_info);
 
index e81c16838b90f1bc9a5418bc1b4e5365e9cb0aef..9557d8b516df5a689dda995cd7fd501ddd6cf54c 100644 (file)
@@ -55,8 +55,7 @@ struct bpf_jit {
 #define SEEN_LITERAL   8       /* code uses literals */
 #define SEEN_FUNC      16      /* calls C functions */
 #define SEEN_TAIL_CALL 32      /* code uses tail calls */
-#define SEEN_SKB_CHANGE        64      /* code changes skb data */
-#define SEEN_REG_AX    128     /* code uses constant blinding */
+#define SEEN_REG_AX    64      /* code uses constant blinding */
 #define SEEN_STACK     (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
 
 /*
@@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
                        EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
                                      REG_15, 152);
        }
-       if (jit->seen & SEEN_SKB)
+       if (jit->seen & SEEN_SKB) {
                emit_load_skb_data_hlen(jit);
-       if (jit->seen & SEEN_SKB_CHANGE)
                /* stg %b1,ST_OFF_SKBP(%r0,%r15) */
                EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
                              STK_OFF_SKBP);
+       }
 }
 
 /*
@@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
                EMIT2(0x0d00, REG_14, REG_W1);
                /* lgr %b0,%r2: load return value into %b0 */
                EMIT4(0xb9040000, BPF_REG_0, REG_2);
-               if (bpf_helper_changes_pkt_data((void *)func)) {
-                       jit->seen |= SEEN_SKB_CHANGE;
+               if ((jit->seen & SEEN_SKB) &&
+                   bpf_helper_changes_pkt_data((void *)func)) {
                        /* lg %b1,ST_OFF_SKBP(%r15) */
                        EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
                                      REG_15, STK_OFF_SKBP);
index e5547b22cd1832c3aea507b3dfc694da90568222..0ddbbb03182232fe199f5222248617c72a2f23b7 100644 (file)
@@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
        .previous
 
 ENTRY(__arch_hweight64)
-       sethi   %hi(__sw_hweight16), %g1
-       jmpl    %g1 + %lo(__sw_hweight16), %g0
+       sethi   %hi(__sw_hweight64), %g1
+       jmpl    %g1 + %lo(__sw_hweight64), %g0
         nop
 ENDPROC(__arch_hweight64)
 EXPORT_SYMBOL(__arch_hweight64)
index be3136f142a9993e0c6c8cfa1d651b1685654a73..a8103a84b4ac4a2ec84c44c302862b3aed8b7e7f 100644 (file)
@@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
               task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
               tsk->comm, task_pid_nr(tsk), address,
               (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
index 815c03d7a765524424b92866b1567ea2a43695d4..41363f46797bf9f74dd922fadbd2a3f190e8c9bb 100644 (file)
@@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
               task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
               tsk->comm, task_pid_nr(tsk), address,
               (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
index 33c0f8bb0f33de0c6beadd3dd8a9bef6253bcb10..5335ba3c850ed3acdc074ffe639d3ddac101f2ad 100644 (file)
@@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
        if (!(pmd_val(pmd) & _PAGE_VALID))
                return 0;
 
-       if (!pmd_access_permitted(pmd, write))
+       if (write && !pmd_write(pmd))
                return 0;
 
        refs = 0;
@@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
        if (!(pud_val(pud) & _PAGE_VALID))
                return 0;
 
-       if (!pud_access_permitted(pud, write))
+       if (write && !pud_write(pud))
                return 0;
 
        refs = 0;
index 5765e7e711f78248d2bff70f9c57ca48a4514355..ff5f9cb3039af1f91c8701915f08c051c21d0d81 100644 (file)
@@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
                u8 *func = ((u8 *)__bpf_call_base) + imm;
 
                ctx->saw_call = true;
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
 
                emit_call((u32 *)func, ctx);
                emit_nop(ctx);
 
                emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
 
-               if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
-                       load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
+               if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
+                       load_skb_regs(ctx, L7);
                break;
        }
 
index 50a32c33d729ba2a570eadaf77cff69925218c42..73c57f614c9e0600a5b4df7b28a7fa55f4abe471 100644 (file)
@@ -1,4 +1,5 @@
 generic-y += barrier.h
+generic-y += bpf_perf_event.h
 generic-y += bug.h
 generic-y += clkdev.h
 generic-y += current.h
index b668e351fd6c2e4f7a4b75c8a67eada77449abc9..fca34b2177e28a055663055d01c4fb7d78420285 100644 (file)
@@ -15,9 +15,10 @@ extern void uml_setup_stubs(struct mm_struct *mm);
 /*
  * Needed since we do not use the asm-generic/mm_hooks.h:
  */
-static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
        uml_setup_stubs(mm);
+       return 0;
 }
 extern void arch_exit_mmap(struct mm_struct *mm);
 static inline void arch_unmap(struct mm_struct *mm,
index 4e6fcb32620ffb2125f648622499e5bf7c950e72..428644175956231aad112a0ce221452913736635 100644 (file)
@@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
        if (!printk_ratelimit())
                return;
 
-       printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
+       printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
                task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
                tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
                (void *)UPT_IP(regs), (void *)UPT_SP(regs),
index 59b06b48f27d7a4e0d8b82fc147d3fdad7f75295..5c205a9cb5a6a4bb2c865255bc946d7ca4882db1 100644 (file)
@@ -81,9 +81,10 @@ do { \
        } \
 } while (0)
 
-static inline void arch_dup_mmap(struct mm_struct *oldmm,
-                                struct mm_struct *mm)
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+                               struct mm_struct *mm)
 {
+       return 0;
 }
 
 static inline void arch_unmap(struct mm_struct *mm,
index 8eed3f94bfc774de5e3f344590f8889a999dea9c..d4fc98c50378c40bc901f6446d2bfff68151eb6a 100644 (file)
@@ -926,7 +926,8 @@ config MAXSMP
 config NR_CPUS
        int "Maximum number of CPUs" if SMP && !MAXSMP
        range 2 8 if SMP && X86_32 && !X86_BIGSMP
-       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK
+       range 2 64 if SMP && X86_32 && X86_BIGSMP
+       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
        range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
        default "1" if !SMP
        default "8192" if MAXSMP
index 6293a8768a9123038eeced9e8dc2c4874feed275..672441c008c73ae3d949b974b128ef167d7d486a 100644 (file)
@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
 config UNWINDER_GUESS
        bool "Guess unwinder"
        depends on EXPERT
+       depends on !STACKDEPOT
        ---help---
          This option enables the "guess" unwinder for unwinding kernel stack
          traces.  It scans the stack and reports every kernel text address it
index 1e9c322e973af0e1024dc3751b99d16eb76574f7..f25e1530e0644c83d8c69b1a90436c54ce823ae4 100644 (file)
@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
 ifdef CONFIG_X86_64
        vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
        vmlinux-objs-y += $(obj)/mem_encrypt.o
+       vmlinux-objs-y += $(obj)/pgtable_64.o
 endif
 
 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
index 20919b4f31330fbc36528e47cf7dd010e516c31c..fc313e29fe2c4be637ff7d41bfafdc7a29144ba4 100644 (file)
@@ -305,10 +305,18 @@ ENTRY(startup_64)
        leaq    boot_stack_end(%rbx), %rsp
 
 #ifdef CONFIG_X86_5LEVEL
-       /* Check if 5-level paging has already enabled */
-       movq    %cr4, %rax
-       testl   $X86_CR4_LA57, %eax
-       jnz     lvl5
+       /*
+        * Check if we need to enable 5-level paging.
+        * RSI holds real mode data and need to be preserved across
+        * a function call.
+        */
+       pushq   %rsi
+       call    l5_paging_required
+       popq    %rsi
+
+       /* If l5_paging_required() returned zero, we're done here. */
+       cmpq    $0, %rax
+       je      lvl5
 
        /*
         * At this point we are in long mode with 4-level paging enabled,
index b50c42455e25257bff89dd2d9d5f23534340076e..98761a1576ceb5c21b2d8c7e98c1217fd48abb26 100644 (file)
@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
        }
 }
 
+static bool l5_supported(void)
+{
+       /* Check if leaf 7 is supported. */
+       if (native_cpuid_eax(0) < 7)
+               return 0;
+
+       /* Check if la57 is supported. */
+       return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
+}
+
 #if CONFIG_X86_NEED_RELOCS
 static void handle_relocations(void *output, unsigned long output_len,
                               unsigned long virt_addr)
@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
        console_init();
        debug_putstr("early console in extract_kernel\n");
 
+       if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
+               error("This linux kernel as configured requires 5-level paging\n"
+                       "This CPU does not support the required 'cr4.la57' feature\n"
+                       "Unable to boot - please use a kernel appropriate for your CPU\n");
+       }
+
        free_mem_ptr     = heap;        /* Heap */
        free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
index d5364ca2e3f9290d0ba36606b7e25369f872c144..b5e5e02f8cde7fa9123dc56981c3a3a98f45843c 100644 (file)
@@ -23,6 +23,9 @@
  */
 #undef CONFIG_AMD_MEM_ENCRYPT
 
+/* No PAGE_TABLE_ISOLATION support needed either: */
+#undef CONFIG_PAGE_TABLE_ISOLATION
+
 #include "misc.h"
 
 /* These actually do the work of building the kernel identity maps. */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
new file mode 100644 (file)
index 0000000..b4469a3
--- /dev/null
@@ -0,0 +1,28 @@
+#include <asm/processor.h>
+
+/*
+ * __force_order is used by special_insns.h asm code to force instruction
+ * serialization.
+ *
+ * It is not referenced from the code, but GCC < 5 with -fPIE would fail
+ * due to an undefined symbol. Define it to make these ancient GCCs work.
+ */
+unsigned long __force_order;
+
+int l5_paging_required(void)
+{
+       /* Check if leaf 7 is supported. */
+
+       if (native_cpuid_eax(0) < 7)
+               return 0;
+
+       /* Check if la57 is supported. */
+       if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+               return 0;
+
+       /* Check if 5-level paging has already been enabled. */
+       if (native_read_cr4() & X86_CR4_LA57)
+               return 0;
+
+       return 1;
+}
index 49f4970f693b3bddacad0b2965acad0ae112cb03..c9e8499fbfe75c0a98d0223247fa6cb0746198bb 100644 (file)
@@ -44,9 +44,9 @@ FDINITRD=$6
 
 # Make sure the files actually exist
 verify "$FBZIMAGE"
-verify "$MTOOLSRC"
 
 genbzdisk() {
+       verify "$MTOOLSRC"
        mformat a:
        syslinux $FIMAGE
        echo "$KCMDLINE" | mcopy - a:syslinux.cfg
@@ -57,6 +57,7 @@ genbzdisk() {
 }
 
 genfdimage144() {
+       verify "$MTOOLSRC"
        dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
        mformat v:
        syslinux $FIMAGE
@@ -68,6 +69,7 @@ genfdimage144() {
 }
 
 genfdimage288() {
+       verify "$MTOOLSRC"
        dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
        mformat w:
        syslinux $FIMAGE
index 399a29d067d6367603714633fb8c4de6ab77275a..cb91a64a99e7cdbc0422227383611378fb6b076a 100644 (file)
@@ -59,13 +59,6 @@ static int encrypt(struct blkcipher_desc *desc,
 
        salsa20_ivsetup(ctx, walk.iv);
 
-       if (likely(walk.nbytes == nbytes))
-       {
-               salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-                                     walk.dst.virt.addr, nbytes);
-               return blkcipher_walk_done(desc, &walk, 0);
-       }
-
        while (walk.nbytes >= 64) {
                salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
                                      walk.dst.virt.addr,
index 3fd8bc560faece4cb6253e87a4c092965a73e7af..45a63e00a6af9a12b4739246d6844ba94f766e71 100644 (file)
@@ -1,6 +1,11 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/jump_label.h>
 #include <asm/unwind_hints.h>
+#include <asm/cpufeatures.h>
+#include <asm/page_types.h>
+#include <asm/percpu.h>
+#include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
 
 /*
 
@@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
 #endif
 .endm
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+
+/*
+ * PAGE_TABLE_ISOLATION PGDs are 8k.  Flip bit 12 to switch between the two
+ * halves:
+ */
+#define PTI_SWITCH_PGTABLES_MASK       (1<<PAGE_SHIFT)
+#define PTI_SWITCH_MASK                (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
+
+.macro SET_NOFLUSH_BIT reg:req
+       bts     $X86_CR3_PCID_NOFLUSH_BIT, \reg
+.endm
+
+.macro ADJUST_KERNEL_CR3 reg:req
+       ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
+       /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
+       andq    $(~PTI_SWITCH_MASK), \reg
+.endm
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       mov     %cr3, \scratch_reg
+       ADJUST_KERNEL_CR3 \scratch_reg
+       mov     \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+#define THIS_CPU_user_pcid_flush_mask   \
+       PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+       mov     %cr3, \scratch_reg
+
+       ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+       /*
+        * Test if the ASID needs a flush.
+        */
+       movq    \scratch_reg, \scratch_reg2
+       andq    $(0x7FF), \scratch_reg          /* mask ASID */
+       bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       jnc     .Lnoflush_\@
+
+       /* Flush needed, clear the bit */
+       btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       movq    \scratch_reg2, \scratch_reg
+       jmp     .Lwrcr3_\@
+
+.Lnoflush_\@:
+       movq    \scratch_reg2, \scratch_reg
+       SET_NOFLUSH_BIT \scratch_reg
+
+.Lwrcr3_\@:
+       /* Flip the PGD and ASID to the user version */
+       orq     $(PTI_SWITCH_MASK), \scratch_reg
+       mov     \scratch_reg, %cr3
+.Lend_\@:
+.endm
+
+.macro SWITCH_TO_USER_CR3_STACK        scratch_reg:req
+       pushq   %rax
+       SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
+       popq    %rax
+.endm
+
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+       ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
+       movq    %cr3, \scratch_reg
+       movq    \scratch_reg, \save_reg
+       /*
+        * Is the "switch mask" all zero?  That means that both of
+        * these are zero:
+        *
+        *      1. The user/kernel PCID bit, and
+        *      2. The user/kernel "bit" that points CR3 to the
+        *         bottom half of the 8k PGD
+        *
+        * That indicates a kernel CR3 value, not a user CR3.
+        */
+       testq   $(PTI_SWITCH_MASK), \scratch_reg
+       jz      .Ldone_\@
+
+       ADJUST_KERNEL_CR3 \scratch_reg
+       movq    \scratch_reg, %cr3
+
+.Ldone_\@:
+.endm
+
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+       ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+
+       ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
+
+       /*
+        * KERNEL pages can always resume with NOFLUSH as we do
+        * explicit flushes.
+        */
+       bt      $X86_CR3_PTI_SWITCH_BIT, \save_reg
+       jnc     .Lnoflush_\@
+
+       /*
+        * Check if there's a pending flush for the user ASID we're
+        * about to set.
+        */
+       movq    \save_reg, \scratch_reg
+       andq    $(0x7FF), \scratch_reg
+       bt      \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       jnc     .Lnoflush_\@
+
+       btr     \scratch_reg, THIS_CPU_user_pcid_flush_mask
+       jmp     .Lwrcr3_\@
+
+.Lnoflush_\@:
+       SET_NOFLUSH_BIT \save_reg
+
+.Lwrcr3_\@:
+       /*
+        * The CR3 write could be avoided when not changing its value,
+        * but would require a CR3 read *and* a scratch register.
+        */
+       movq    \save_reg, %cr3
+.Lend_\@:
+.endm
+
+#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
+
+.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+.endm
+.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+.endm
+.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+.endm
+.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
+.endm
+.macro RESTORE_CR3 scratch_reg:req save_reg:req
+.endm
+
+#endif
+
 #endif /* CONFIG_X86_64 */
 
 /*
index 4838037f97f6edffda62b5b045c837fcc29402f0..ace8f321a5a1f2d1331cc4331a1922c9ed3d8bc1 100644 (file)
@@ -941,9 +941,10 @@ ENTRY(debug)
        movl    %esp, %eax                      # pt_regs pointer
 
        /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
        jb      .Ldebug_from_sysenter_stack
 
        TRACE_IRQS_OFF
@@ -984,9 +985,10 @@ ENTRY(nmi)
        movl    %esp, %eax                      # pt_regs pointer
 
        /* Are we currently on the SYSENTER stack? */
-       PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
-       subl    %eax, %ecx      /* ecx = (end of SYSENTER_stack) - esp */
-       cmpl    $SIZEOF_SYSENTER_stack, %ecx
+       movl    PER_CPU_VAR(cpu_entry_area), %ecx
+       addl    $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
+       subl    %eax, %ecx      /* ecx = (end of entry_stack) - esp */
+       cmpl    $SIZEOF_entry_stack, %ecx
        jb      .Lnmi_from_sysenter_stack
 
        /* Not on SYSENTER stack. */
index f81d50d7ceacdefa06d61482687937096c68421c..f048e384ff54e06530b657efc3b00cdf50f1ce5b 100644 (file)
@@ -23,7 +23,6 @@
 #include <asm/segment.h>
 #include <asm/cache.h>
 #include <asm/errno.h>
-#include "calling.h"
 #include <asm/asm-offsets.h>
 #include <asm/msr.h>
 #include <asm/unistd.h>
@@ -40,6 +39,8 @@
 #include <asm/frame.h>
 #include <linux/err.h>
 
+#include "calling.h"
+
 .code64
 .section .entry.text, "ax"
 
@@ -140,6 +141,67 @@ END(native_usergs_sysret64)
  * with them due to bugs in both AMD and Intel CPUs.
  */
 
+       .pushsection .entry_trampoline, "ax"
+
+/*
+ * The code in here gets remapped into cpu_entry_area's trampoline.  This means
+ * that the assembler and linker have the wrong idea as to where this code
+ * lives (and, in fact, it's mapped more than once, so it's not even at a
+ * fixed address).  So we can't reference any symbols outside the entry
+ * trampoline and expect it to work.
+ *
+ * Instead, we carefully abuse %rip-relative addressing.
+ * _entry_trampoline(%rip) refers to the start of the remapped) entry
+ * trampoline.  We can thus find cpu_entry_area with this macro:
+ */
+
+#define CPU_ENTRY_AREA \
+       _entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
+
+/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
+#define RSP_SCRATCH    CPU_ENTRY_AREA_entry_stack + \
+                       SIZEOF_entry_stack - 8 + CPU_ENTRY_AREA
+
+ENTRY(entry_SYSCALL_64_trampoline)
+       UNWIND_HINT_EMPTY
+       swapgs
+
+       /* Stash the user RSP. */
+       movq    %rsp, RSP_SCRATCH
+
+       /* Note: using %rsp as a scratch reg. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
+       /* Load the top of the task stack into RSP */
+       movq    CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+
+       /* Start building the simulated IRET frame. */
+       pushq   $__USER_DS                      /* pt_regs->ss */
+       pushq   RSP_SCRATCH                     /* pt_regs->sp */
+       pushq   %r11                            /* pt_regs->flags */
+       pushq   $__USER_CS                      /* pt_regs->cs */
+       pushq   %rcx                            /* pt_regs->ip */
+
+       /*
+        * x86 lacks a near absolute jump, and we can't jump to the real
+        * entry text with a relative jump.  We could push the target
+        * address and then use retq, but this destroys the pipeline on
+        * many CPUs (wasting over 20 cycles on Sandy Bridge).  Instead,
+        * spill RDI and restore it in a second-stage trampoline.
+        */
+       pushq   %rdi
+       movq    $entry_SYSCALL_64_stage2, %rdi
+       jmp     *%rdi
+END(entry_SYSCALL_64_trampoline)
+
+       .popsection
+
+ENTRY(entry_SYSCALL_64_stage2)
+       UNWIND_HINT_EMPTY
+       popq    %rdi
+       jmp     entry_SYSCALL_64_after_hwframe
+END(entry_SYSCALL_64_stage2)
+
 ENTRY(entry_SYSCALL_64)
        UNWIND_HINT_EMPTY
        /*
@@ -149,6 +211,10 @@ ENTRY(entry_SYSCALL_64)
         */
 
        swapgs
+       /*
+        * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+        * is not required to switch CR3.
+        */
        movq    %rsp, PER_CPU_VAR(rsp_scratch)
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
@@ -330,8 +396,25 @@ syscall_return_via_sysret:
        popq    %rsi    /* skip rcx */
        popq    %rdx
        popq    %rsi
+
+       /*
+        * Now all regs are restored except RSP and RDI.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       pushq   RSP-RDI(%rdi)   /* RSP */
+       pushq   (%rdi)          /* RDI */
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
        popq    %rdi
-       movq    RSP-ORIG_RAX(%rsp), %rsp
+       popq    %rsp
        USERGS_SYSRET64
 END(entry_SYSCALL_64)
 
@@ -466,12 +549,13 @@ END(irq_entries_start)
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
 #ifdef CONFIG_DEBUG_ENTRY
-       pushfq
-       testl $X86_EFLAGS_IF, (%rsp)
+       pushq %rax
+       SAVE_FLAGS(CLBR_RAX)
+       testl $X86_EFLAGS_IF, %eax
        jz .Lokay_\@
        ud2
 .Lokay_\@:
-       addq $8, %rsp
+       popq %rax
 #endif
 .endm
 
@@ -563,6 +647,13 @@ END(irq_entries_start)
 /* 0(%rsp): ~(interrupt number) */
        .macro interrupt func
        cld
+
+       testb   $3, CS-ORIG_RAX(%rsp)
+       jz      1f
+       SWAPGS
+       call    switch_to_thread_stack
+1:
+
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS
        SAVE_EXTRA_REGS
@@ -572,12 +663,8 @@ END(irq_entries_start)
        jz      1f
 
        /*
-        * IRQ from user mode.  Switch to kernel gsbase and inform context
-        * tracking that we're in kernel mode.
-        */
-       SWAPGS
-
-       /*
+        * IRQ from user mode.
+        *
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
         * (which can take locks).  Since TRACE_IRQS_OFF idempotent,
@@ -630,10 +717,43 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
        ud2
 1:
 #endif
-       SWAPGS
        POP_EXTRA_REGS
-       POP_C_REGS
-       addq    $8, %rsp        /* skip regs->orig_ax */
+       popq    %r11
+       popq    %r10
+       popq    %r9
+       popq    %r8
+       popq    %rax
+       popq    %rcx
+       popq    %rdx
+       popq    %rsi
+
+       /*
+        * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
+        * Save old stack pointer and switch to trampoline stack.
+        */
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+
+       /* Copy the IRET frame to the trampoline stack. */
+       pushq   6*8(%rdi)       /* SS */
+       pushq   5*8(%rdi)       /* RSP */
+       pushq   4*8(%rdi)       /* EFLAGS */
+       pushq   3*8(%rdi)       /* CS */
+       pushq   2*8(%rdi)       /* RIP */
+
+       /* Push user RDI on the trampoline stack. */
+       pushq   (%rdi)
+
+       /*
+        * We are on the trampoline stack.  All regs except RDI are live.
+        * We can do future final exit work right here.
+        */
+
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+
+       /* Restore RDI. */
+       popq    %rdi
+       SWAPGS
        INTERRUPT_RETURN
 
 
@@ -713,7 +833,9 @@ native_irq_return_ldt:
         */
 
        pushq   %rdi                            /* Stash user RDI */
-       SWAPGS
+       SWAPGS                                  /* to kernel GS */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi   /* to kernel CR3 */
+
        movq    PER_CPU_VAR(espfix_waddr), %rdi
        movq    %rax, (0*8)(%rdi)               /* user RAX */
        movq    (1*8)(%rsp), %rax               /* user RIP */
@@ -729,7 +851,6 @@ native_irq_return_ldt:
        /* Now RAX == RSP. */
 
        andl    $0xffff0000, %eax               /* RAX = (RSP & 0xffff0000) */
-       popq    %rdi                            /* Restore user RDI */
 
        /*
         * espfix_stack[31:16] == 0.  The page tables are set up such that
@@ -740,7 +861,11 @@ native_irq_return_ldt:
         * still points to an RO alias of the ESPFIX stack.
         */
        orq     PER_CPU_VAR(espfix_stack), %rax
-       SWAPGS
+
+       SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
+       SWAPGS                                  /* to user GS */
+       popq    %rdi                            /* Restore user RDI */
+
        movq    %rax, %rsp
        UNWIND_HINT_IRET_REGS offset=8
 
@@ -829,7 +954,35 @@ apicinterrupt IRQ_WORK_VECTOR                      irq_work_interrupt              smp_irq_work_interrupt
 /*
  * Exception entry points.
  */
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+
+/*
+ * Switch to the thread stack.  This is called with the IRET frame and
+ * orig_ax on the stack.  (That is, RDI..R12 are not on the stack and
+ * space has not been allocated for them.)
+ */
+ENTRY(switch_to_thread_stack)
+       UNWIND_HINT_FUNC
+
+       pushq   %rdi
+       /* Need to switch before accessing the thread stack. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+       movq    %rsp, %rdi
+       movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+       UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
+
+       pushq   7*8(%rdi)               /* regs->ss */
+       pushq   6*8(%rdi)               /* regs->rsp */
+       pushq   5*8(%rdi)               /* regs->eflags */
+       pushq   4*8(%rdi)               /* regs->cs */
+       pushq   3*8(%rdi)               /* regs->ip */
+       pushq   2*8(%rdi)               /* regs->orig_ax */
+       pushq   8(%rdi)                 /* return address */
+       UNWIND_HINT_FUNC
+
+       movq    (%rdi), %rdi
+       ret
+END(switch_to_thread_stack)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -848,11 +1001,12 @@ ENTRY(\sym)
 
        ALLOC_PT_GPREGS_ON_STACK
 
-       .if \paranoid
-       .if \paranoid == 1
+       .if \paranoid < 2
        testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
-       jnz     1f
+       jnz     .Lfrom_usermode_switch_stack_\@
        .endif
+
+       .if \paranoid
        call    paranoid_entry
        .else
        call    error_entry
@@ -894,20 +1048,15 @@ ENTRY(\sym)
        jmp     error_exit
        .endif
 
-       .if \paranoid == 1
+       .if \paranoid < 2
        /*
-        * Paranoid entry from userspace.  Switch stacks and treat it
+        * Entry from userspace.  Switch stacks and treat it
         * as a normal entry.  This means that paranoid handlers
         * run in real process context if user_mode(regs).
         */
-1:
+.Lfrom_usermode_switch_stack_\@:
        call    error_entry
 
-
-       movq    %rsp, %rdi                      /* pt_regs pointer */
-       call    sync_regs
-       movq    %rax, %rsp                      /* switch stack */
-
        movq    %rsp, %rdi                      /* pt_regs pointer */
 
        .if \has_error_code
@@ -1119,7 +1268,11 @@ ENTRY(paranoid_entry)
        js      1f                              /* negative -> in kernel */
        SWAPGS
        xorl    %ebx, %ebx
-1:     ret
+
+1:
+       SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+
+       ret
 END(paranoid_entry)
 
 /*
@@ -1141,6 +1294,7 @@ ENTRY(paranoid_exit)
        testl   %ebx, %ebx                      /* swapgs needed? */
        jnz     .Lparanoid_exit_no_swapgs
        TRACE_IRQS_IRETQ
+       RESTORE_CR3     scratch_reg=%rbx save_reg=%r14
        SWAPGS_UNSAFE_STACK
        jmp     .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
@@ -1168,8 +1322,18 @@ ENTRY(error_entry)
         * from user mode due to an IRET fault.
         */
        SWAPGS
+       /* We have user CR3.  Change to kernel CR3. */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
 .Lerror_entry_from_usermode_after_swapgs:
+       /* Put us onto the real thread stack. */
+       popq    %r12                            /* save return addr in %12 */
+       movq    %rsp, %rdi                      /* arg0 = pt_regs pointer */
+       call    sync_regs
+       movq    %rax, %rsp                      /* switch stack */
+       ENCODE_FRAME_POINTER
+       pushq   %r12
+
        /*
         * We need to tell lockdep that IRQs are off.  We can't do this until
         * we fix gsbase, and we should do it before enter_from_user_mode
@@ -1206,6 +1370,7 @@ ENTRY(error_entry)
         * .Lgs_change's error handler with kernel gsbase.
         */
        SWAPGS
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
        jmp .Lerror_entry_done
 
 .Lbstep_iret:
@@ -1215,10 +1380,11 @@ ENTRY(error_entry)
 
 .Lerror_bad_iret:
        /*
-        * We came from an IRET to user mode, so we have user gsbase.
-        * Switch to kernel gsbase:
+        * We came from an IRET to user mode, so we have user
+        * gsbase and CR3.  Switch to kernel gsbase and CR3:
         */
        SWAPGS
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 
        /*
         * Pretend that the exception came from user mode: set up pt_regs
@@ -1250,6 +1416,10 @@ END(error_exit)
 /*
  * Runs on exception stack.  Xen PV does not go through this path at all,
  * so we can use real assembly here.
+ *
+ * Registers:
+ *     %r14: Used to save/restore the CR3 of the interrupted context
+ *           when PAGE_TABLE_ISOLATION is in use.  Do not clobber.
  */
 ENTRY(nmi)
        UNWIND_HINT_IRET_REGS
@@ -1313,6 +1483,7 @@ ENTRY(nmi)
 
        swapgs
        cld
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
        movq    %rsp, %rdx
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
        UNWIND_HINT_IRET_REGS base=%rdx offset=8
@@ -1565,6 +1736,8 @@ end_repeat_nmi:
        movq    $-1, %rsi
        call    do_nmi
 
+       RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
+
        testl   %ebx, %ebx                      /* swapgs needed? */
        jnz     nmi_restore
 nmi_swapgs:
index 568e130d932cd2a7d44393e5fc52408cffe64f34..40f17009ec20cd5e4eff6147aa0468232d47e096 100644 (file)
  */
 ENTRY(entry_SYSENTER_compat)
        /* Interrupts are off on entry. */
-       SWAPGS_UNSAFE_STACK
+       SWAPGS
+
+       /* We are about to clobber %rsp anyway, clobbering here is OK */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
+
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
        /*
@@ -215,6 +219,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
        pushq   $0                      /* pt_regs->r14 = 0 */
        pushq   $0                      /* pt_regs->r15 = 0 */
 
+       /*
+        * We just saved %rdi so it is safe to clobber.  It is not
+        * preserved during the C calls inside TRACE_IRQS_OFF anyway.
+        */
+       SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+
        /*
         * User mode is traced as though IRQs are on, and SYSENTER
         * turned them off.
@@ -256,10 +266,22 @@ sysret32_from_system_call:
         * when the system call started, which is already known to user
         * code.  We zero R8-R10 to avoid info leaks.
          */
+       movq    RSP-ORIG_RAX(%rsp), %rsp
+
+       /*
+        * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
+        * on the process stack which is not mapped to userspace and
+        * not readable after we SWITCH_TO_USER_CR3.  Delay the CR3
+        * switch until after after the last reference to the process
+        * stack.
+        *
+        * %r8/%r9 are zeroed before the sysret, thus safe to clobber.
+        */
+       SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
+
        xorq    %r8, %r8
        xorq    %r9, %r9
        xorq    %r10, %r10
-       movq    RSP-ORIG_RAX(%rsp), %rsp
        swapgs
        sysretl
 END(entry_SYSCALL_compat)
@@ -306,8 +328,11 @@ ENTRY(entry_INT80_compat)
         */
        movl    %eax, %eax
 
-       /* Construct struct pt_regs on stack (iret frame is already on stack) */
        pushq   %rax                    /* pt_regs->orig_ax */
+
+       /* switch to thread stack expects orig_ax to be pushed */
+       call    switch_to_thread_stack
+
        pushq   %rdi                    /* pt_regs->di */
        pushq   %rsi                    /* pt_regs->si */
        pushq   %rdx                    /* pt_regs->dx */
index f279ba2643dc8933b9659242082e7ef2ea2d9dd6..577fa8adb785baf5ea1c993a2bbc88adf43fbbcc 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/unistd.h>
 #include <asm/fixmap.h>
 #include <asm/traps.h>
+#include <asm/paravirt.h>
 
 #define CREATE_TRACE_POINTS
 #include "vsyscall_trace.h"
@@ -138,6 +139,10 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 
        WARN_ON_ONCE(address != regs->ip);
 
+       /* This should be unreachable in NATIVE mode. */
+       if (WARN_ON(vsyscall_mode == NATIVE))
+               return false;
+
        if (vsyscall_mode == NONE) {
                warn_bad_vsyscall(KERN_INFO, regs,
                                  "vsyscall attempted with vsyscall=none");
@@ -329,16 +334,47 @@ int in_gate_area_no_mm(unsigned long addr)
        return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
 }
 
+/*
+ * The VSYSCALL page is the only user-accessible page in the kernel address
+ * range.  Normally, the kernel page tables can have _PAGE_USER clear, but
+ * the tables covering VSYSCALL_ADDR need _PAGE_USER set if vsyscalls
+ * are enabled.
+ *
+ * Some day we may create a "minimal" vsyscall mode in which we emulate
+ * vsyscalls but leave the page not present.  If so, we skip calling
+ * this.
+ */
+void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
+{
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
+       set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
+       p4d = p4d_offset(pgd, VSYSCALL_ADDR);
+#if CONFIG_PGTABLE_LEVELS >= 5
+       p4d->p4d |= _PAGE_USER;
+#endif
+       pud = pud_offset(p4d, VSYSCALL_ADDR);
+       set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
+       pmd = pmd_offset(pud, VSYSCALL_ADDR);
+       set_pmd(pmd, __pmd(pmd_val(*pmd) | _PAGE_USER));
+}
+
 void __init map_vsyscall(void)
 {
        extern char __vsyscall_page;
        unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
 
-       if (vsyscall_mode != NONE)
+       if (vsyscall_mode != NONE) {
                __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
                             vsyscall_mode == NATIVE
                             ? PAGE_KERNEL_VSYSCALL
                             : PAGE_KERNEL_VVAR);
+               set_vsyscall_pgtable_user_bits(swapper_pg_dir);
+       }
 
        BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
                     (unsi